|
1 | 1 | PAGE = u""" |
2 | | -<style id="scrapy-style" type="text/css">@import url(http://localhost:8000/as/site_media/clean.css); |
3 | | -</style> |
| 2 | +<style id="scrapy-style" type="text/css">@import url(http://localhost:8000/as/site_media/clean.css); |
| 3 | + </style> |
4 | 4 | <body> |
5 | 5 | <div class="scrapy-selected" id="header"> |
6 | 6 | <img src="company_logo.jpg" style="margin-left: 68px; padding-top:5px;" alt="Logo" width="530" height="105"> |
|
152 | 152 | {'end': 150, 'start': 149}, |
153 | 153 | ] |
154 | 154 |
|
155 | | -# for testing tags inside comments |
156 | | -PAGE3 = u"""<html><body><h1>Helloooo!!</h1><p>Did i say hello??</p><!--<p> |
157 | | -</p>--><script type="text/javascript">bla<!--comment-->blabla</script></body></html>""" |
| 155 | +# for testing tags in different forms |
| 156 | +PAGE3 = u"""<!DOCTYPE html> |
| 157 | +<html> |
| 158 | + <head> |
| 159 | + <!-- Standard comment style --> |
| 160 | + <title>Page name</title> |
| 161 | + <meta name="name" content="value"><!> <!-- <- Self Closing Comment --!> |
| 162 | + </head> |
| 163 | +
|
| 164 | + <!-- Comment used for ignoring a script |
| 165 | + <script type="text/javascript"> |
| 166 | + var a = 1; |
| 167 | + </script> |
| 168 | + --> |
| 169 | + <body> |
| 170 | + </body> |
| 171 | +</html> |
| 172 | +""" |
158 | 173 |
|
159 | 174 | PARSED3 = [ |
160 | | - {'attributes': {}, 'end': 6, 'start': 0, 'tag': u'html', 'tag_type': 1}, |
161 | | - {'attributes': {}, 'end': 12, 'start': 6, 'tag': u'body', 'tag_type': 1}, |
162 | | - {'attributes': {}, 'end': 16, 'start': 12, 'tag': u'h1', 'tag_type': 1}, |
163 | | - {'end': 26, 'start': 16}, |
164 | | - {'attributes': {}, 'end': 31, 'start': 26, 'tag': u'h1', 'tag_type': 2}, |
165 | | - {'attributes': {}, 'end': 34, 'start': 31, 'tag': u'p', 'tag_type': 1}, |
166 | | - {'end': 51, 'start': 34}, |
167 | | - {'attributes': {}, 'end': 55, 'start': 51, 'tag': u'p', 'tag_type': 2}, |
168 | | - {'end': 70, 'start': 55, 'is_text_content': False}, |
169 | | - {'attributes': {u'type': u'text/javascript'}, 'end': 101, 'start': 70, 'tag': u'script', 'tag_type': 1}, |
170 | | - {'end': 104, 'start': 101, 'is_text_content': False}, |
171 | | - {'end': 118, 'start': 104, 'is_text_content': False}, |
172 | | - {'end': 124, 'start': 118, 'is_text_content': False}, |
173 | | - {'attributes': {}, 'end': 133, 'start': 124, 'tag': u'script', 'tag_type': 2}, |
174 | | - {'attributes': {}, 'end': 140, 'start': 133, 'tag': u'body', 'tag_type': 2}, |
175 | | - {'attributes': {}, 'end': 147, 'start': 140, 'tag': u'html', 'tag_type': 2} |
| 175 | + {'end': 16, 'start': 15, 'is_text_content': True}, |
| 176 | + {'end': 22, 'start': 16, 'attributes': {}, 'tag_type': 1, 'is_text_content': False, 'tag': 'html'}, |
| 177 | + {'end': 27, 'start': 22, 'is_text_content': True}, |
| 178 | + {'end': 33, 'start': 27, 'attributes': {}, 'tag_type': 1, 'is_text_content': False, 'tag': 'head'}, |
| 179 | + {'end': 38, 'start': 33, 'is_text_content': True}, |
| 180 | + {'end': 69, 'start': 38, 'is_text_content': False}, |
| 181 | + {'end': 74, 'start': 69, 'is_text_content': True}, |
| 182 | + {'end': 81, 'start': 74, 'attributes': {}, 'tag_type': 1, 'is_text_content': False, 'tag': 'title'}, |
| 183 | + {'end': 90, 'start': 81, 'is_text_content': True}, |
| 184 | + {'end': 98, 'start': 90, 'attributes': {}, 'tag_type': 2, 'is_text_content': False, 'tag': 'title'}, |
| 185 | + {'end': 103, 'start': 98, 'is_text_content': True}, |
| 186 | + {'end': 137, 'start': 103, 'attributes': {'content': 'value', 'name': 'name'}, 'tag_type': 1, 'is_text_content': False, 'tag': 'meta'}, |
| 187 | + {'end': 140, 'start': 137, 'is_text_content': False}, |
| 188 | + {'end': 141, 'start': 140, 'is_text_content': True}, |
| 189 | + {'end': 174, 'start': 141, 'is_text_content': False}, |
| 190 | + {'end': 179, 'start': 174, 'is_text_content': True}, |
| 191 | + {'end': 186, 'start': 179, 'attributes': {}, 'tag_type': 2, 'is_text_content': False, 'tag': 'head'}, |
| 192 | + {'end': 192, 'start': 186, 'is_text_content': True}, |
| 193 | + {'end': 320, 'start': 192, 'is_text_content': False}, |
| 194 | + {'end': 325, 'start': 320, 'is_text_content': True}, |
| 195 | + {'end': 331, 'start': 325, 'attributes': {}, 'tag_type': 1, 'is_text_content': False, 'tag': 'body'}, |
| 196 | + {'end': 336, 'start': 331, 'is_text_content': True}, |
| 197 | + {'end': 343, 'start': 336, 'attributes': {}, 'tag_type': 2, 'is_text_content': False, 'tag': 'body'}, |
| 198 | + {'end': 344, 'start': 343, 'is_text_content': True}, |
| 199 | + {'end': 351, 'start': 344, 'attributes': {}, 'tag_type': 2, 'is_text_content': False, 'tag': 'html'}, |
| 200 | + {'end': 352, 'start': 351, 'is_text_content': True} |
176 | 201 | ] |
177 | 202 |
|
178 | 203 | # for testing tags inside scripts |
|
293 | 318 | {"attributes": {}, "end": 91, "start": 84, "tag": "body", "tag_type": 2}, |
294 | 319 | {"attributes": {}, "end": 98, "start": 91, "tag": "html", "tag_type": 2} |
295 | 320 | ] |
296 | | - |
|
0 commit comments