Generate unittests instead of amassing them in one test

AlexRiina · AlexRiina · commit 3ec86eb3e70c · 2013-08-25T01:16:48.000-04:00
diff --git a/scrapely/tests/test_extraction.py b/scrapely/tests/test_extraction.py
@@ -4,8 +4,7 @@
 Page parsing effectiveness is measured through the evaluation system. These
 tests should focus on specific bits of functionality work correctly.
 """
-from unittest import TestCase
-import numpy
+from functools import partial
 
 from scrapely.htmlpage import HtmlPage
 from scrapely.descriptor import (FieldDescriptor as A, 
@@ -1289,51 +1288,34 @@
     ),
 ]
 
-class TestIbl(TestCase):
-
-    def _run_extraction(self, name, templates, page, descriptor, expected_output):
-        self.trace = None
-        template_pages = [HtmlPage(None, {}, t) for t in templates]
-        # extracts with trace enabled in order to generate traceback
-        extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages], True)
-        actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
-        if actual_output is not None:
-            actual_output = actual_output[0]
-            self.trace = ["Extractor:\n%s" % extractor] + actual_output.pop('trace')
-        # extracts again with trace disabled in order to get the pure output
-        extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages])
-        actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
-        if actual_output is None:
-            if expected_output is None:
-                return
-            assert False, "failed to extract data for test '%s'" % name
-        else:
-            actual_output = actual_output[0]
-        expected_names = set(expected_output.keys())
-        actual_names = set(actual_output.keys())
-        
-        missing_in_output = filter(None, expected_names - actual_names)
-        error = "attributes '%s' were expected but were not present in test '%s'" % \
-                ("', '".join(missing_in_output), name)
-        assert len(missing_in_output) == 0, error
-
-        unexpected = actual_names - expected_names
-        error = "unexpected attributes %s in test '%s'" % \
-                (', '.join(unexpected), name)
-        assert len(unexpected) == 0, error
-
-        for k, v in expected_output.items():
-            extracted = actual_output[k]
-            assert v == extracted, "in test '%s' for attribute '%s', " \
-                "expected value '%s' but got '%s'" % (name, k, v, extracted)
-
-    def test_expected_outputs(self):
-        try:
-            for data in TEST_DATA:
-                self._run_extraction(*data)
-        except AssertionError:
-            if self.trace:
-                print "Trace:"
-                for line in self.trace:
-                    print "\n---\n%s" % line
-            raise
+def _run_extraction(name, templates, page, descriptor, expected_output):
+    template_pages = [HtmlPage(None, {}, t) for t in templates]
+
+    extractor = InstanceBasedLearningExtractor([(t, descriptor) for t in template_pages])
+    actual_output, _ = extractor.extract(HtmlPage(None, {}, page))
+    if actual_output is None:
+        assert not expected_output, "failed to extract data for test '%s'" % name
+        return
+    else:
+        actual_output = actual_output[0]
+    expected_names = set(expected_output.keys())
+    actual_names = set(actual_output.keys())
+    
+    missing_in_output = filter(None, expected_names - actual_names)
+    error = "attributes '%s' were expected but were not present in test '%s'" % \
+            ("', '".join(missing_in_output), name)
+    assert not missing_in_output, error
+
+    unexpected = actual_names - expected_names
+    error = "unexpected attributes %s in test '%s'" % \
+            (', '.join(unexpected), name)
+    assert not unexpected, error
+
+    for k, v in expected_output.items():
+        extracted = actual_output[k]
+        assert v == extracted, "in test '%s' for attribute '%s', " \
+            "expected value '%s' but got '%s'" % (name, k, v, extracted)
+
+def test_generator():
+    for data in TEST_DATA:
+        yield partial(_run_extraction, *data)