|
178 | 178 | </body></html> |
179 | 179 | """ |
180 | 180 |
|
| 181 | +LABELLED_PAGE12 = u""" |
| 182 | +<head> |
| 183 | +<meta name="description" content="This is the description" data-scrapy-annotate="{"variant": 0, "generated": false, "text-content": "text-content:", "annotations": {"content": "description"}}" /> |
| 184 | +</head> |
| 185 | +""" |
| 186 | + |
| 187 | +LABELLED_PAGE13 = u""" |
| 188 | +<head> |
| 189 | +<meta name="description" content="This is the description" data-scrapy-annotate="{"variant": 0, "generated": false, "text-content": "text-content", "annotations": {"content": "description", "text-content": "name"}}">This is the name</meta> |
| 190 | +</head> |
| 191 | +""" |
| 192 | + |
181 | 193 | def _parse_page(parser_class, pagetext): |
182 | 194 | htmlpage = HtmlPage(None, {}, pagetext) |
183 | 195 | parser = parser_class(TokenDict()) |
@@ -304,6 +316,22 @@ def test_variant_attribute(self): |
304 | 316 | annotations = _parse_page(TemplatePageParser, LABELLED_PAGE11).annotations |
305 | 317 | self.assertEqual(annotations[0].variant_id, 1) |
306 | 318 |
|
| 319 | + def test_content_attribute(self): |
| 320 | + """ |
| 321 | + Test that attribute with name content is unambiguously interpreted |
| 322 | + """ |
| 323 | + annotations = _parse_page(TemplatePageParser, LABELLED_PAGE12).annotations |
| 324 | + self.assertEqual(annotations[0].surrounds_attribute, None) |
| 325 | + self.assertEqual(annotations[0].tag_attributes, [("content", "description")]) |
| 326 | + |
| 327 | + def test_content_and_content_attribute(self): |
| 328 | + """ |
| 329 | + Test that attribute with name content and the content itself are unambiguously interpreted |
| 330 | + """ |
| 331 | + annotations = _parse_page(TemplatePageParser, LABELLED_PAGE13).annotations |
| 332 | + self.assertEqual(annotations[0].surrounds_attribute, 'name') |
| 333 | + self.assertEqual(annotations[0].tag_attributes, [("content", "description")]) |
| 334 | + |
307 | 335 | def test_site_pages(self): |
308 | 336 | """ |
309 | 337 | Tests from real pages. More reliable and easy to build for more complicated structures |
|
0 commit comments