Handle missing expected errors better

vidartf · vidartf · commit 98399813e21d · 2018-11-13T13:45:14.000+01:00
- Better error message for missing error in output compare.
- Fail cell if an unrun cell with an expected error does not produce one.
diff --git a/nbval/plugin.py b/nbval/plugin.py
@@ -423,12 +423,22 @@ def compare_outputs(self, test, ref, skip_compare=None):
         test_keys = set(testing_outs.keys())
 
         if ref_keys - test_keys:
-            self.comparison_traceback.append(
-                cc.FAIL
-                + "Missing output fields from running code: %s"
-                % (ref_keys - test_keys)
-                + cc.ENDC
-            )
+            if ref_keys == {'evalue', 'ename'}:
+                self.comparison_traceback.append(
+                    cc.FAIL
+                    + "Expected error:\n  %s: %r" % (
+                        '\n'.join(reference_outs['ename']),
+                        '\n'.join(reference_outs['evalue'])
+                    )
+                    + cc.ENDC
+                )
+            else:
+                self.comparison_traceback.append(
+                    cc.FAIL
+                    + "Missing output fields from running code: %s"
+                    % (ref_keys - test_keys)
+                    + cc.ENDC
+                )
             return False
         elif test_keys - ref_keys:
             self.comparison_traceback.append(
@@ -577,6 +587,13 @@ def runtest(self):
         # TODO: Only store if comparing with nbdime, to save on memory usage
         self.test_outputs = outs
 
+        # Cells where the reference is not run, will not check outputs:
+        unrun = self.cell.execution_count is None
+        if unrun and self.cell.outputs:
+            self.raise_cell_error('Unrun reference cell has outputs')
+
+        cell_has_error = False
+
         # Now get the outputs from the iopub channel
         while True:
             # The iopub channel broadcasts a range of messages. We keep reading
@@ -687,6 +704,7 @@ def runtest(self):
             # cell execution. Therefore raise a cell error and pass the
             # traceback information.
             elif msg_type == 'error':
+                cell_has_error = True
                 # Store error in output first
                 out['ename'] = reply['ename']
                 out['evalue'] = reply['evalue']
@@ -695,9 +713,9 @@ def runtest(self):
                 if not self.options['check_exception']:
                     # Ensure we flush iopub before raising error
                     try:
-                        self.parent.kernel.await_idle(msg_id, self.output_timeout)
+                        kernel.await_idle(msg_id, self.output_timeout)
                     except Empty:
-                        self.stop()
+                        kernel.stop()
                         raise RuntimeError('Timed out waiting for idle kernel!')
                     traceback = '\n' + '\n'.join(reply['traceback'])
                     if out['ename'] == 'KeyboardInterrupt' and self.parent.timed_out:
@@ -713,10 +731,11 @@ def runtest(self):
 
         outs[:] = coalesce_streams(outs)
 
-        # Cells where the reference is not run, will not check outputs:
-        unrun = self.cell.execution_count is None
-        if unrun and self.cell.outputs:
-            self.raise_cell_error('Unrun reference cell has outputs')
+        if self.options['check_exception'] and unrun and not cell_has_error:
+            # If unrun, we cannot rely on output comparison for checking errors
+            self.raise_cell_error(
+                "Expected error",
+                "Expected cell to produce an error, but none was produced!")
 
         # Compare if the outputs have the same number of lines
         # and throw an error if it fails
diff --git a/tests/test_expected_exceptions.py b/tests/test_expected_exceptions.py
@@ -0,0 +1,43 @@
+import os
+
+import nbformat
+import pytest
+
+from utils import build_nb
+
+
+pytest_plugins = "pytester"
+
+
+def test_unrun_raises(testdir):
+    # This test uses the testdir fixture from pytester, which is useful for
+    # testing pytest plugins. It writes a notebook to a temporary dir
+    # and then runs pytest.
+
+    # Setup notebook to test:
+    sources = [
+        # In [1]:
+        "raise ValueError('foo')",
+    ]
+    # Build unrun notebook:
+    nb = build_nb(sources, mark_run=False)
+
+    # Write notebook to test dir
+    nbformat.write(nb, os.path.join(
+        str(testdir.tmpdir), 'test_expcted_exceptions.ipynb'))
+
+    # Run tests
+    result = testdir.inline_run('--nbval', '--current-env', '-s')
+    reports = result.getreports('pytest_runtest_logreport')
+
+    # Setup and teardown of cells should have no issues:
+    setup_teardown = [r for r in reports if r.when != 'call']
+    for r in setup_teardown:
+        assert r.passed
+
+    reports = [r for r in reports if r.when == 'call']
+
+    assert len(reports) == 1
+
+    # First cell should fail, unexpectedly
+    assert reports[0].failed and not hasattr(reports[0], 'wasxfail')