Skip to content

Commit 3ec86eb

Browse files
committed
Generate unittests instead of amassing them in one test
1 parent 3c53a53 commit 3ec86eb

File tree

1 file changed

+32
-50
lines changed

1 file changed

+32
-50
lines changed

scrapely/tests/test_extraction.py

Lines changed: 32 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
Page parsing effectiveness is measured through the evaluation system. These
55
tests should focus on specific bits of functionality work correctly.
66
"""
7-
from unittest import TestCase
8-
import numpy
7+
from functools import partial
98

109
from scrapely.htmlpage import HtmlPage
1110
from scrapely.descriptor import (FieldDescriptor as A,
@@ -1289,51 +1288,34 @@
12891288
),
12901289
]
12911290

1292-
class TestIbl(TestCase):
1293-
1294-
def _run_extraction(self, name, templates, page, descriptor, expected_output):
1295-
self.trace = None
1296-
template_pages = [HtmlPage(None, {}, t) for t in templates]
1297-
# extracts with trace enabled in order to generate traceback
1298-
extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages], True)
1299-
actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
1300-
if actual_output is not None:
1301-
actual_output = actual_output[0]
1302-
self.trace = ["Extractor:\n%s" % extractor] + actual_output.pop('trace')
1303-
# extracts again with trace disabled in order to get the pure output
1304-
extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages])
1305-
actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
1306-
if actual_output is None:
1307-
if expected_output is None:
1308-
return
1309-
assert False, "failed to extract data for test '%s'" % name
1310-
else:
1311-
actual_output = actual_output[0]
1312-
expected_names = set(expected_output.keys())
1313-
actual_names = set(actual_output.keys())
1314-
1315-
missing_in_output = filter(None, expected_names - actual_names)
1316-
error = "attributes '%s' were expected but were not present in test '%s'" % \
1317-
("', '".join(missing_in_output), name)
1318-
assert len(missing_in_output) == 0, error
1319-
1320-
unexpected = actual_names - expected_names
1321-
error = "unexpected attributes %s in test '%s'" % \
1322-
(', '.join(unexpected), name)
1323-
assert len(unexpected) == 0, error
1324-
1325-
for k, v in expected_output.items():
1326-
extracted = actual_output[k]
1327-
assert v == extracted, "in test '%s' for attribute '%s', " \
1328-
"expected value '%s' but got '%s'" % (name, k, v, extracted)
1329-
1330-
def test_expected_outputs(self):
1331-
try:
1332-
for data in TEST_DATA:
1333-
self._run_extraction(*data)
1334-
except AssertionError:
1335-
if self.trace:
1336-
print "Trace:"
1337-
for line in self.trace:
1338-
print "\n---\n%s" % line
1339-
raise
1291+
def _run_extraction(name, templates, page, descriptor, expected_output):
1292+
template_pages = [HtmlPage(None, {}, t) for t in templates]
1293+
1294+
extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages])
1295+
actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
1296+
if actual_output is None:
1297+
assert not expected_output, "failed to extract data for test '%s'" % name
1298+
return
1299+
else:
1300+
actual_output = actual_output[0]
1301+
expected_names = set(expected_output.keys())
1302+
actual_names = set(actual_output.keys())
1303+
1304+
missing_in_output = filter(None, expected_names - actual_names)
1305+
error = "attributes '%s' were expected but were not present in test '%s'" % \
1306+
("', '".join(missing_in_output), name)
1307+
assert not missing_in_output, error
1308+
1309+
unexpected = actual_names - expected_names
1310+
error = "unexpected attributes %s in test '%s'" % \
1311+
(', '.join(unexpected), name)
1312+
assert not unexpected, error
1313+
1314+
for k, v in expected_output.items():
1315+
extracted = actual_output[k]
1316+
assert v == extracted, "in test '%s' for attribute '%s', " \
1317+
"expected value '%s' but got '%s'" % (name, k, v, extracted)
1318+
1319+
def test_generator():
1320+
for data in TEST_DATA:
1321+
yield partial(_run_extraction, *data)

0 commit comments

Comments
 (0)