|
5 | 5 | tests should focus on specific bits of functionality work correctly. |
6 | 6 | """ |
7 | 7 | from unittest import TestCase |
8 | | -import numpy |
| 8 | +from nose_parameterized import parameterized |
9 | 9 |
|
10 | 10 | from scrapely.htmlpage import HtmlPage |
11 | 11 | from scrapely.descriptor import (FieldDescriptor as A, |
|
1289 | 1289 | ), |
1290 | 1290 | ] |
1291 | 1291 |
|
1292 | | -class TestIbl(TestCase): |
1293 | 1292 |
|
1294 | | - def _run_extraction(self, name, templates, page, descriptor, expected_output): |
1295 | | - self.trace = None |
| 1293 | + |
| 1294 | +class TestExtraction(TestCase): |
| 1295 | + @parameterized.expand(TEST_DATA) |
| 1296 | + def test_extraction(self, name, templates, page, descriptor, expected_output): |
1296 | 1297 | template_pages = [HtmlPage(None, {}, t) for t in templates] |
1297 | | - # extracts with trace enabled in order to generate traceback |
1298 | | - extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages], True) |
1299 | | - actual_output, _ = extractor.extract(HtmlPage(None, {}, page)) |
1300 | | - if actual_output is not None: |
1301 | | - actual_output = actual_output[0] |
1302 | | - self.trace = ["Extractor:\n%s" % extractor] + actual_output.pop('trace') |
1303 | | - # extracts again with trace disabled in order to get the pure output |
| 1298 | + |
1304 | 1299 | extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages]) |
1305 | 1300 | actual_output, _ = extractor.extract(HtmlPage(None, {}, page)) |
1306 | | - if actual_output is None: |
1307 | | - if expected_output is None: |
1308 | | - return |
1309 | | - assert False, "failed to extract data for test '%s'" % name |
1310 | | - else: |
1311 | | - actual_output = actual_output[0] |
1312 | | - expected_names = set(expected_output.keys()) |
1313 | | - actual_names = set(actual_output.keys()) |
1314 | | - |
1315 | | - missing_in_output = filter(None, expected_names - actual_names) |
1316 | | - error = "attributes '%s' were expected but were not present in test '%s'" % \ |
1317 | | - ("', '".join(missing_in_output), name) |
1318 | | - assert len(missing_in_output) == 0, error |
1319 | | - |
1320 | | - unexpected = actual_names - expected_names |
1321 | | - error = "unexpected attributes %s in test '%s'" % \ |
1322 | | - (', '.join(unexpected), name) |
1323 | | - assert len(unexpected) == 0, error |
1324 | | - |
1325 | | - for k, v in expected_output.items(): |
1326 | | - extracted = actual_output[k] |
1327 | | - assert v == extracted, "in test '%s' for attribute '%s', " \ |
1328 | | - "expected value '%s' but got '%s'" % (name, k, v, extracted) |
1329 | | - |
1330 | | - def test_expected_outputs(self): |
1331 | | - try: |
1332 | | - for data in TEST_DATA: |
1333 | | - self._run_extraction(*data) |
1334 | | - except AssertionError: |
1335 | | - if self.trace: |
1336 | | - print "Trace:" |
1337 | | - for line in self.trace: |
1338 | | - print "\n---\n%s" % line |
1339 | | - raise |
| 1301 | + |
| 1302 | + self.assertEqual(expected_output, actual_output and actual_output[0]) |
0 commit comments