Skip to content

Commit fa5b550

Browse files
committed
TestCase has some nice comparison methods that, together with nose-parameterized, make the actual test code quite clear
1 parent 3ec86eb commit fa5b550

File tree

1 file changed

+13
-32
lines changed

1 file changed

+13
-32
lines changed

scrapely/tests/test_extraction.py

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
Page parsing effectiveness is measured through the evaluation system. These
55
tests should focus on specific bits of functionality work correctly.
66
"""
7-
from functools import partial
7+
from unittest import TestCase
8+
from nose_parameterized import parameterized
89

910
from scrapely.htmlpage import HtmlPage
1011
from scrapely.descriptor import (FieldDescriptor as A,
@@ -1288,34 +1289,14 @@
12881289
),
12891290
]
12901291

1291-
def _run_extraction(name, templates, page, descriptor, expected_output):
1292-
template_pages = [HtmlPage(None, {}, t) for t in templates]
1293-
1294-
extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages])
1295-
actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
1296-
if actual_output is None:
1297-
assert not expected_output, "failed to extract data for test '%s'" % name
1298-
return
1299-
else:
1300-
actual_output = actual_output[0]
1301-
expected_names = set(expected_output.keys())
1302-
actual_names = set(actual_output.keys())
1303-
1304-
missing_in_output = filter(None, expected_names - actual_names)
1305-
error = "attributes '%s' were expected but were not present in test '%s'" % \
1306-
("', '".join(missing_in_output), name)
1307-
assert not missing_in_output, error
1308-
1309-
unexpected = actual_names - expected_names
1310-
error = "unexpected attributes %s in test '%s'" % \
1311-
(', '.join(unexpected), name)
1312-
assert not unexpected, error
1313-
1314-
for k, v in expected_output.items():
1315-
extracted = actual_output[k]
1316-
assert v == extracted, "in test '%s' for attribute '%s', " \
1317-
"expected value '%s' but got '%s'" % (name, k, v, extracted)
1318-
1319-
def test_generator():
1320-
for data in TEST_DATA:
1321-
yield partial(_run_extraction, *data)
1292+
1293+
1294+
class TestExtraction(TestCase):
1295+
@parameterized.expand(TEST_DATA)
1296+
def test_extraction(self, name, templates, page, descriptor, expected_output):
1297+
template_pages = [HtmlPage(None, {}, t) for t in templates]
1298+
1299+
extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages])
1300+
actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
1301+
1302+
self.assertEqual(expected_output, actual_output and actual_output[0])

0 commit comments

Comments
 (0)