|
4 | 4 | Page parsing effectiveness is measured through the evaluation system. These |
5 | 5 | tests should focus on specific bits of functionality work correctly. |
6 | 6 | """ |
7 | | -from functools import partial |
| 7 | +from unittest import TestCase |
| 8 | +from nose_parameterized import parameterized |
8 | 9 |
|
9 | 10 | from scrapely.htmlpage import HtmlPage |
10 | 11 | from scrapely.descriptor import (FieldDescriptor as A, |
|
1288 | 1289 | ), |
1289 | 1290 | ] |
1290 | 1291 |
|
1291 | | -def _run_extraction(name, templates, page, descriptor, expected_output): |
1292 | | - template_pages = [HtmlPage(None, {}, t) for t in templates] |
1293 | | - |
1294 | | - extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages]) |
1295 | | - actual_output, _ = extractor.extract(HtmlPage(None, {}, page)) |
1296 | | - if actual_output is None: |
1297 | | - assert not expected_output, "failed to extract data for test '%s'" % name |
1298 | | - return |
1299 | | - else: |
1300 | | - actual_output = actual_output[0] |
1301 | | - expected_names = set(expected_output.keys()) |
1302 | | - actual_names = set(actual_output.keys()) |
1303 | | - |
1304 | | - missing_in_output = filter(None, expected_names - actual_names) |
1305 | | - error = "attributes '%s' were expected but were not present in test '%s'" % \ |
1306 | | - ("', '".join(missing_in_output), name) |
1307 | | - assert not missing_in_output, error |
1308 | | - |
1309 | | - unexpected = actual_names - expected_names |
1310 | | - error = "unexpected attributes %s in test '%s'" % \ |
1311 | | - (', '.join(unexpected), name) |
1312 | | - assert not unexpected, error |
1313 | | - |
1314 | | - for k, v in expected_output.items(): |
1315 | | - extracted = actual_output[k] |
1316 | | - assert v == extracted, "in test '%s' for attribute '%s', " \ |
1317 | | - "expected value '%s' but got '%s'" % (name, k, v, extracted) |
1318 | | - |
1319 | | -def test_generator(): |
1320 | | - for data in TEST_DATA: |
1321 | | - yield partial(_run_extraction, *data) |
| 1292 | + |
| 1293 | + |
| 1294 | +class TestExtraction(TestCase): |
| 1295 | + @parameterized.expand(TEST_DATA) |
| 1296 | + def test_extraction(self, name, templates, page, descriptor, expected_output): |
| 1297 | + template_pages = [HtmlPage(None, {}, t) for t in templates] |
| 1298 | + |
| 1299 | + extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages]) |
| 1300 | + actual_output, _ = extractor.extract(HtmlPage(None, {}, page)) |
| 1301 | + |
| 1302 | + self.assertEqual(expected_output, actual_output and actual_output[0]) |
0 commit comments