1616
1717abstract class AbstractCrawlerTest extends TestCase
1818{
19- /**
20- * @param mixed $node
21- * @param string|null $uri
22- * @param string|null $baseHref
23- *
24- * @return Crawler
25- */
26- abstract public function createCrawler ($ node = null , string $ uri = null , string $ baseHref = null );
19+ abstract public function getDoctype (): string ;
20+
21+ protected function createCrawler ($ node = null , string $ uri = null , string $ baseHref = null )
22+ {
23+ return new Crawler ($ node , $ uri , $ baseHref );
24+ }
2725
2826 public function testConstructor ()
2927 {
@@ -74,7 +72,7 @@ public function testAdd()
7472 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->add() adds nodes from a \DOMNode ' );
7573
7674 $ crawler = $ this ->createCrawler ();
77- $ crawler ->add ('<html><body>Foo</body></html> ' );
75+ $ crawler ->add ($ this -> getDoctype (). '<html><body>Foo</body></html> ' );
7876 $ this ->assertEquals ('Foo ' , $ crawler ->filterXPath ('//body ' )->text (), '->add() adds nodes from a string ' );
7977 }
8078
@@ -94,22 +92,21 @@ public function testAddInvalidType()
9492 public function testAddMultipleDocumentNode ()
9593 {
9694 $ crawler = $ this ->createTestCrawler ();
97- $ crawler ->addHtmlContent ('<html><div class="foo"></html> ' , 'UTF-8 ' );
95+ $ crawler ->addHtmlContent ($ this -> getDoctype (). '<html><div class="foo"></html> ' , 'UTF-8 ' );
9896 }
9997
10098 public function testAddHtmlContent ()
10199 {
102100 $ crawler = $ this ->createCrawler ();
103- $ crawler ->addHtmlContent ('<html><div class="foo"></html> ' , 'UTF-8 ' );
101+ $ crawler ->addHtmlContent ($ this -> getDoctype (). '<html><div class="foo"></html> ' , 'UTF-8 ' );
104102
105103 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addHtmlContent() adds nodes from an HTML string ' );
106104 }
107105
108106 public function testAddHtmlContentWithBaseTag ()
109107 {
110108 $ crawler = $ this ->createCrawler ();
111-
112- $ crawler ->addHtmlContent ('<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html> ' , 'UTF-8 ' );
109+ $ crawler ->addHtmlContent ($ this ->getDoctype ().'<html><head><base href="http://symfony.com"></head><a href="/contact"></a></html> ' , 'UTF-8 ' );
113110
114111 $ this ->assertEquals ('http://symfony.com ' , $ crawler ->filterXPath ('//base ' )->attr ('href ' ), '->addHtmlContent() adds nodes from an HTML string ' );
115112 $ this ->assertEquals ('http://symfony.com/contact ' , $ crawler ->filterXPath ('//a ' )->link ()->getUri (), '->addHtmlContent() adds nodes from an HTML string ' );
@@ -121,15 +118,15 @@ public function testAddHtmlContentWithBaseTag()
121118 public function testAddHtmlContentCharset ()
122119 {
123120 $ crawler = $ this ->createCrawler ();
124- $ crawler ->addHtmlContent ('<html><div class="foo">Tiếng Việt</html> ' , 'UTF-8 ' );
121+ $ crawler ->addHtmlContent ($ this -> getDoctype (). '<html><div class="foo">Tiếng Việt</html> ' , 'UTF-8 ' );
125122
126123 $ this ->assertEquals ('Tiếng Việt ' , $ crawler ->filterXPath ('//div ' )->text ());
127124 }
128125
129126 public function testAddHtmlContentInvalidBaseTag ()
130127 {
131128 $ crawler = $ this ->createCrawler (null , 'http://symfony.com ' );
132- $ crawler ->addHtmlContent ('<html><head><base target="_top"></head><a href="/contact"></a></html> ' , 'UTF-8 ' );
129+ $ crawler ->addHtmlContent ($ this -> getDoctype (). '<html><head><base target="_top"></head><a href="/contact"></a></html> ' , 'UTF-8 ' );
133130
134131 $ this ->assertEquals ('http://symfony.com/contact ' , current ($ crawler ->filterXPath ('//a ' )->links ())->getUri (), '->addHtmlContent() correctly handles a non-existent base tag href attribute ' );
135132 }
@@ -141,55 +138,55 @@ public function testAddHtmlContentCharsetGbk()
141138 {
142139 $ crawler = $ this ->createCrawler ();
143140 //gbk encode of <html><p>中文</p></html>
144- $ crawler ->addHtmlContent (base64_decode ('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+ ' ), 'gbk ' );
141+ $ crawler ->addHtmlContent ($ this -> getDoctype (). base64_decode ('PGh0bWw+PHA+1tDOxDwvcD48L2h0bWw+ ' ), 'gbk ' );
145142
146143 $ this ->assertEquals ('中文 ' , $ crawler ->filterXPath ('//p ' )->text ());
147144 }
148145
149146 public function testAddXmlContent ()
150147 {
151148 $ crawler = $ this ->createCrawler ();
152- $ crawler ->addXmlContent ('<html><div class="foo"></div></html> ' , 'UTF-8 ' );
149+ $ crawler ->addXmlContent ($ this -> getDoctype (). '<html><div class="foo"></div></html> ' , 'UTF-8 ' );
153150
154151 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addXmlContent() adds nodes from an XML string ' );
155152 }
156153
157154 public function testAddXmlContentCharset ()
158155 {
159156 $ crawler = $ this ->createCrawler ();
160- $ crawler ->addXmlContent ('<html><div class="foo">Tiếng Việt</div></html> ' , 'UTF-8 ' );
157+ $ crawler ->addXmlContent ($ this -> getDoctype (). '<html><div class="foo">Tiếng Việt</div></html> ' , 'UTF-8 ' );
161158
162159 $ this ->assertEquals ('Tiếng Việt ' , $ crawler ->filterXPath ('//div ' )->text ());
163160 }
164161
165162 public function testAddContent ()
166163 {
167164 $ crawler = $ this ->createCrawler ();
168- $ crawler ->addContent ('<html><div class="foo"></html> ' , 'text/html; charset=UTF-8 ' );
165+ $ crawler ->addContent ($ this -> getDoctype (). '<html><div class="foo"></html> ' , 'text/html; charset=UTF-8 ' );
169166 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() adds nodes from an HTML string ' );
170167
171168 $ crawler = $ this ->createCrawler ();
172- $ crawler ->addContent ('<html><div class="foo"></html> ' , 'text/html; charset=UTF-8; dir=RTL ' );
169+ $ crawler ->addContent ($ this -> getDoctype (). '<html><div class="foo"></html> ' , 'text/html; charset=UTF-8; dir=RTL ' );
173170 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() adds nodes from an HTML string with extended content type ' );
174171
175172 $ crawler = $ this ->createCrawler ();
176- $ crawler ->addContent ('<html><div class="foo"></html> ' );
173+ $ crawler ->addContent ($ this -> getDoctype (). '<html><div class="foo"></html> ' );
177174 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() uses text/html as the default type ' );
178175
179176 $ crawler = $ this ->createCrawler ();
180- $ crawler ->addContent ('<html><div class="foo"></div></html> ' , 'text/xml; charset=UTF-8 ' );
177+ $ crawler ->addContent ($ this -> getDoctype (). '<html><div class="foo"></div></html> ' , 'text/xml; charset=UTF-8 ' );
181178 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() adds nodes from an XML string ' );
182179
183180 $ crawler = $ this ->createCrawler ();
184- $ crawler ->addContent ('<html><div class="foo"></div></html> ' , 'text/xml ' );
181+ $ crawler ->addContent ($ this -> getDoctype (). '<html><div class="foo"></div></html> ' , 'text/xml ' );
185182 $ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() adds nodes from an XML string ' );
186183
187184 $ crawler = $ this ->createCrawler ();
188185 $ crawler ->addContent ('foo bar ' , 'text/plain ' );
189186 $ this ->assertCount (0 , $ crawler , '->addContent() does nothing if the type is not (x|ht)ml ' );
190187
191188 $ crawler = $ this ->createCrawler ();
192- $ crawler ->addContent ('<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html> ' );
189+ $ crawler ->addContent ($ this -> getDoctype (). '<html><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><span>中文</span></html> ' );
193190 $ this ->assertEquals ('中文 ' , $ crawler ->filterXPath ('//span ' )->text (), '->addContent() guess wrong charset ' );
194191 }
195192
@@ -199,7 +196,7 @@ public function testAddContent()
199196 public function testAddContentNonUtf8 ()
200197 {
201198 $ crawler = $ this ->createCrawler ();
202- $ crawler ->addContent (iconv ('UTF-8 ' , 'SJIS ' , '<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html> ' ));
199+ $ crawler ->addContent (iconv ('UTF-8 ' , 'SJIS ' , $ this -> getDoctype (). '<html><head><meta charset="Shift_JIS"></head><body>日本語</body></html> ' ));
203200 $ this ->assertEquals ('日本語 ' , $ crawler ->filterXPath ('//body ' )->text (), '->addContent() can recognize "Shift_JIS" in html5 meta charset tag ' );
204201 }
205202
@@ -314,7 +311,7 @@ public function testAttr()
314311 public function testMissingAttrValueIsNull ()
315312 {
316313 $ crawler = $ this ->createCrawler ();
317- $ crawler ->addContent ('<html><div non-empty-attr="sample value" empty-attr=""></div></html> ' , 'text/html; charset=UTF-8 ' );
314+ $ crawler ->addContent ($ this -> getDoctype (). '<html><div non-empty-attr="sample value" empty-attr=""></div></html> ' , 'text/html; charset=UTF-8 ' );
318315 $ div = $ crawler ->filterXPath ('//div ' );
319316
320317 $ this ->assertEquals ('sample value ' , $ div ->attr ('non-empty-attr ' ), '->attr() reads non-empty attributes correctly ' );
@@ -670,7 +667,6 @@ public function testSelectButton()
670667 public function testSelectButtonWithSingleQuotesInNameAttribute ()
671668 {
672669 $ html = <<<'HTML'
673- <!DOCTYPE html>
674670<html lang="en">
675671<body>
676672 <div id="action">
@@ -683,15 +679,14 @@ public function testSelectButtonWithSingleQuotesInNameAttribute()
683679</html>
684680HTML;
685681
686- $ crawler = $ this ->createCrawler ($ html );
682+ $ crawler = $ this ->createCrawler ($ this -> getDoctype (). $ html );
687683
688684 $ this ->assertCount (1 , $ crawler ->selectButton ('Click \'Here \'' ));
689685 }
690686
691687 public function testSelectButtonWithDoubleQuotesInNameAttribute ()
692688 {
693689 $ html = <<<'HTML'
694- <!DOCTYPE html>
695690<html lang="en">
696691<body>
697692 <div id="action">
@@ -704,7 +699,7 @@ public function testSelectButtonWithDoubleQuotesInNameAttribute()
704699</html>
705700HTML;
706701
707- $ crawler = $ this ->createCrawler ($ html );
702+ $ crawler = $ this ->createCrawler ($ this -> getDoctype (). $ html );
708703
709704 $ this ->assertCount (1 , $ crawler ->selectButton ('Click "Here" ' ));
710705 }
@@ -763,7 +758,6 @@ public function testImage()
763758 public function testSelectLinkAndLinkFiltered ()
764759 {
765760 $ html = <<<'HTML'
766- <!DOCTYPE html>
767761<html lang="en">
768762<body>
769763 <div id="action">
@@ -776,7 +770,7 @@ public function testSelectLinkAndLinkFiltered()
776770</html>
777771HTML;
778772
779- $ crawler = $ this ->createCrawler ($ html );
773+ $ crawler = $ this ->createCrawler ($ this -> getDoctype (). $ html );
780774 $ filtered = $ crawler ->filterXPath ("descendant-or-self::*[@id = 'login-form'] " );
781775
782776 $ this ->assertCount (0 , $ filtered ->selectLink ('Login ' ));
@@ -793,7 +787,7 @@ public function testSelectLinkAndLinkFiltered()
793787
794788 public function testChaining ()
795789 {
796- $ crawler = $ this ->createCrawler ('<div name="a"><div name="b"><div name="c"></div></div></div> ' );
790+ $ crawler = $ this ->createCrawler ($ this -> getDoctype (). '<div name="a"><div name="b"><div name="c"></div></div></div> ' );
797791
798792 $ this ->assertEquals ('a ' , $ crawler ->filterXPath ('//div ' )->filterXPath ('div ' )->filterXPath ('div ' )->attr ('name ' ));
799793 }
@@ -965,7 +959,6 @@ public function testChildren()
965959 public function testFilteredChildren ()
966960 {
967961 $ html = <<<'HTML'
968- <!DOCTYPE html>
969962<html lang="en">
970963<body>
971964 <div id="foo">
@@ -981,7 +974,7 @@ public function testFilteredChildren()
981974</html>
982975HTML;
983976
984- $ crawler = $ this ->createCrawler ($ html );
977+ $ crawler = $ this ->createCrawler ($ this -> getDoctype (). $ html );
985978 $ foo = $ crawler ->filter ('#foo ' );
986979
987980 $ this ->assertEquals (3 , $ foo ->children ()->count ());
@@ -1018,7 +1011,7 @@ public function testParents()
10181011 */
10191012 public function testBaseTag ($ baseValue , $ linkValue , $ expectedUri , $ currentUri = null , $ description = '' )
10201013 {
1021- $ crawler = $ this ->createCrawler ('<html><base href=" ' .$ baseValue .'"><a href=" ' .$ linkValue .'"></a></html> ' , $ currentUri );
1014+ $ crawler = $ this ->createCrawler ($ this -> getDoctype (). '<html><base href=" ' .$ baseValue .'"><a href=" ' .$ linkValue .'"></a></html> ' , $ currentUri );
10221015 $ this ->assertEquals ($ expectedUri , $ crawler ->filterXPath ('//a ' )->link ()->getUri (), $ description );
10231016 }
10241017
@@ -1038,7 +1031,7 @@ public function getBaseTagData()
10381031 */
10391032 public function testBaseTagWithForm ($ baseValue , $ actionValue , $ expectedUri , $ currentUri = null , $ description = null )
10401033 {
1041- $ crawler = $ this ->createCrawler ('<html><base href=" ' .$ baseValue .'"><form method="post" action=" ' .$ actionValue .'"><button type="submit" name="submit"/></form></html> ' , $ currentUri );
1034+ $ crawler = $ this ->createCrawler ($ this -> getDoctype (). '<html><base href=" ' .$ baseValue .'"><form method="post" action=" ' .$ actionValue .'"><button type="submit" name="submit"/></form></html> ' , $ currentUri );
10421035 $ this ->assertEquals ($ expectedUri , $ crawler ->filterXPath ('//button ' )->form ()->getUri (), $ description );
10431036 }
10441037
@@ -1113,7 +1106,7 @@ public function testEvaluateThrowsAnExceptionIfDocumentIsEmpty()
11131106 public function testInheritedClassCallChildrenWithoutArgument ()
11141107 {
11151108 $ dom = new \DOMDocument ();
1116- $ dom ->loadHTML ('
1109+ $ dom ->loadHTML ($ this -> getDoctype (). '
11171110 <html>
11181111 <body>
11191112 <a href="foo">Foo</a>
@@ -1165,15 +1158,15 @@ public function testInheritedClassCallChildrenWithoutArgument()
11651158 public function testAddHtmlContentUnsupportedCharset ()
11661159 {
11671160 $ crawler = $ this ->createCrawler ();
1168- $ crawler ->addHtmlContent (file_get_contents (__DIR__ .'/Fixtures/windows-1250.html ' ), 'Windows-1250 ' );
1161+ $ crawler ->addHtmlContent ($ this -> getDoctype (). file_get_contents (__DIR__ .'/Fixtures/windows-1250.html ' ), 'Windows-1250 ' );
11691162
11701163 $ this ->assertEquals ('Žťčýů ' , $ crawler ->filterXPath ('//p ' )->text ());
11711164 }
11721165
11731166 public function createTestCrawler ($ uri = null )
11741167 {
11751168 $ dom = new \DOMDocument ();
1176- $ dom ->loadHTML ('
1169+ $ dom ->loadHTML ($ this -> getDoctype (). '
11771170 <html>
11781171 <body>
11791172 <a href="foo">Foo</a>
0 commit comments