@@ -423,12 +423,22 @@ def compare_outputs(self, test, ref, skip_compare=None):
423423 test_keys = set (testing_outs .keys ())
424424
425425 if ref_keys - test_keys :
426- self .comparison_traceback .append (
427- cc .FAIL
428- + "Missing output fields from running code: %s"
429- % (ref_keys - test_keys )
430- + cc .ENDC
431- )
426+ if ref_keys == {'evalue' , 'ename' }:
427+ self .comparison_traceback .append (
428+ cc .FAIL
429+ + "Expected error:\n %s: %r" % (
430+ '\n ' .join (reference_outs ['ename' ]),
431+ '\n ' .join (reference_outs ['evalue' ])
432+ )
433+ + cc .ENDC
434+ )
435+ else :
436+ self .comparison_traceback .append (
437+ cc .FAIL
438+ + "Missing output fields from running code: %s"
439+ % (ref_keys - test_keys )
440+ + cc .ENDC
441+ )
432442 return False
433443 elif test_keys - ref_keys :
434444 self .comparison_traceback .append (
@@ -577,6 +587,13 @@ def runtest(self):
577587 # TODO: Only store if comparing with nbdime, to save on memory usage
578588 self .test_outputs = outs
579589
590+ # Cells where the reference is not run, will not check outputs:
591+ unrun = self .cell .execution_count is None
592+ if unrun and self .cell .outputs :
593+ self .raise_cell_error ('Unrun reference cell has outputs' )
594+
595+ cell_has_error = False
596+
580597 # Now get the outputs from the iopub channel
581598 while True :
582599 # The iopub channel broadcasts a range of messages. We keep reading
@@ -687,6 +704,7 @@ def runtest(self):
687704 # cell execution. Therefore raise a cell error and pass the
688705 # traceback information.
689706 elif msg_type == 'error' :
707+ cell_has_error = True
690708 # Store error in output first
691709 out ['ename' ] = reply ['ename' ]
692710 out ['evalue' ] = reply ['evalue' ]
@@ -695,9 +713,9 @@ def runtest(self):
695713 if not self .options ['check_exception' ]:
696714 # Ensure we flush iopub before raising error
697715 try :
698- self . parent . kernel .await_idle (msg_id , self .output_timeout )
716+ kernel .await_idle (msg_id , self .output_timeout )
699717 except Empty :
700- self .stop ()
718+ kernel .stop ()
701719 raise RuntimeError ('Timed out waiting for idle kernel!' )
702720 traceback = '\n ' + '\n ' .join (reply ['traceback' ])
703721 if out ['ename' ] == 'KeyboardInterrupt' and self .parent .timed_out :
@@ -713,10 +731,11 @@ def runtest(self):
713731
714732 outs [:] = coalesce_streams (outs )
715733
716- # Cells where the reference is not run, will not check outputs:
717- unrun = self .cell .execution_count is None
718- if unrun and self .cell .outputs :
719- self .raise_cell_error ('Unrun reference cell has outputs' )
734+ if self .options ['check_exception' ] and unrun and not cell_has_error :
735+ # If unrun, we cannot rely on output comparison for checking errors
736+ self .raise_cell_error (
737+ "Expected error" ,
738+ "Expected cell to produce an error, but none was produced!" )
720739
721740 # Compare if the outputs have the same number of lines
722741 # and throw an error if it fails
0 commit comments