Skip to content

Commit 96ca55e

Browse files
Add Fenix::CommException
1 parent 3e0bcb1 commit 96ca55e

File tree

7 files changed

+218
-0
lines changed

7 files changed

+218
-0
lines changed

include/fenix.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include <mpi.h>
6262
#include <functional>
6363
#include "fenix.h"
64+
#include "fenix_exception.hpp"
6465

6566
/**
6667
* @brief As the C-style callback, but accepts an std::function and does not use the void* pointer.
@@ -71,4 +72,14 @@
7172
*/
7273
int Fenix_Callback_register(std::function<void(MPI_Comm, int)> callback);
7374

75+
/**
76+
* @brief Registers a callback that throws a CommException
77+
*
78+
* This means no longjmp will occur, and instead applications
79+
* will continue from their try-catch error handler.
80+
*
81+
* @returnstatus
82+
*/
83+
int register_exception_callback();
84+
7485
#endif

include/fenix_exception.hpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
//@HEADER
3+
// ************************************************************************
4+
//
5+
//
6+
// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _|
7+
// _| _| _|_| _| _| _| _|
8+
// _|_|_| _|_|_| _| _| _| _| _|
9+
// _| _| _| _|_| _| _| _|
10+
// _| _|_|_|_| _| _| _|_|_| _| _|
11+
//
12+
//
13+
//
14+
//
15+
// Copyright (C) 2016 Rutgers University and Sandia Corporation
16+
//
17+
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18+
// the U.S. Government retains certain rights in this software.
19+
//
20+
// Redistribution and use in source and binary forms, with or without
21+
// modification, are permitted provided that the following conditions are
22+
// met:
23+
//
24+
// 1. Redistributions of source code must retain the above copyright
25+
// notice, this list of conditions and the following disclaimer.
26+
//
27+
// 2. Redistributions in binary form must reproduce the above copyright
28+
// notice, this list of conditions and the following disclaimer in the
29+
// documentation and/or other materials provided with the distribution.
30+
//
31+
// 3. Neither the name of the Corporation nor the names of the
32+
// contributors may be used to endorse or promote products derived from
33+
// this software without specific prior written permission.
34+
//
35+
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
36+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
39+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
40+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
41+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
42+
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
43+
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44+
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45+
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46+
//
47+
// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar,
48+
// Rob Van der Wijngaart, Michael Heroux, and Matthew Whitlock
49+
//
50+
// Questions? Contact Keita Teranishi (knteran@sandia.gov) and
51+
// Marc Gamell (mgamell@cac.rutgers.edu)
52+
//
53+
// ************************************************************************
54+
//@HEADER
55+
*/
56+
57+
#ifndef FENIX_EXCEPTION_HPP
58+
#define FENIX_EXCEPTION_HPP
59+
60+
#include <mpi.h>
61+
#include <exception>
62+
63+
namespace Fenix {
64+
65+
struct CommException : public std::exception {
66+
MPI_Comm repaired_comm;
67+
const int fenix_err;
68+
CommException(MPI_Comm comm, int err) :
69+
repaired_comm(comm), fenix_err(err) { };
70+
};
71+
72+
}
73+
74+
#endif

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ FILE(GLOB Fenix_HEADERS ${CMAKE_SOURCE_DIR}/include/*.h*)
1616

1717
set (Fenix_SOURCES
1818
fenix.cpp
19+
fenix_exception.cpp
1920
fenix_opt.cpp
2021
fenix_process_recovery.cpp
2122
fenix_util.cpp

src/fenix_exception.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#include "fenix_exception.hpp"
2+
#include "fenix.h"
3+
4+
namespace Fenix {
5+
int register_exception_callback(){
6+
return Fenix_Callback_register(
7+
[](MPI_Comm repaired_comm, int fen_err){
8+
throw CommException(repaired_comm, fen_err);
9+
}
10+
);
11+
}
12+
}

test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ add_subdirectory(request_cancelled)
55
add_subdirectory(no_jump)
66
add_subdirectory(issend)
77
add_subdirectory(failed_spares)
8+
add_subdirectory(exception_throw)
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#
2+
# This file is part of Fenix
3+
# Copyright (c) 2016 Rutgers University and Sandia Corporation.
4+
# This software is distributed under the BSD License.
5+
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
6+
# the U.S. Government retains certain rights in this software.
7+
# For more information, see the LICENSE file in the top Fenix
8+
# directory.
9+
#
10+
11+
add_executable(fenix_exceptions fenix_exceptions.cpp)
12+
target_link_libraries(fenix_exceptions fenix MPI::MPI_CXX)
13+
14+
add_test(NAME exception_throw
15+
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 6 ${MPIEXEC_PREFLAGS} fenix_exceptions ${MPIEXEC_POSTFLAGS})
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
//@HEADER
3+
// ************************************************************************
4+
//
5+
//
6+
// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _|
7+
// _| _| _|_| _| _| _| _|
8+
// _|_|_| _|_|_| _| _| _| _| _|
9+
// _| _| _| _|_| _| _| _|
10+
// _| _|_|_|_| _| _| _|_|_| _| _|
11+
//
12+
//
13+
//
14+
//
15+
// Copyright (C) 2016 Rutgers University and Sandia Corporation
16+
//
17+
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18+
// the U.S. Government retains certain rights in this software.
19+
//
20+
// Redistribution and use in source and binary forms, with or without
21+
// modification, are permitted provided that the following conditions are
22+
// met:
23+
//
24+
// 1. Redistributions of source code must retain the above copyright
25+
// notice, this list of conditions and the following disclaimer.
26+
//
27+
// 2. Redistributions in binary form must reproduce the above copyright
28+
// notice, this list of conditions and the following disclaimer in the
29+
// documentation and/or other materials provided with the distribution.
30+
//
31+
// 3. Neither the name of the Corporation nor the names of the
32+
// contributors may be used to endorse or promote products derived from
33+
// this software without specific prior written permission.
34+
//
35+
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
36+
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38+
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
39+
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
40+
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
41+
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
42+
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
43+
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
44+
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
45+
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
46+
//
47+
// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar,
48+
// Michael Heroux, and Matthew Whitlock
49+
//
50+
// Questions? Contact Keita Teranishi (knteran@sandia.gov) and
51+
// Marc Gamell (mgamell@cac.rutgers.edu)
52+
//
53+
// ************************************************************************
54+
//@HEADER
55+
*/
56+
57+
#include <mpi.h>
58+
59+
#include <fenix.h>
60+
#include <stdio.h>
61+
#include <signal.h>
62+
#include <sys/types.h>
63+
#include <unistd.h>
64+
#include <pthread.h>
65+
66+
int main(int argc, char **argv) {
67+
volatile int status = 0;
68+
69+
MPI_Init(&argc, &argv);
70+
71+
int fenix_role, error;
72+
MPI_Comm res_comm;
73+
MPI_Info info;
74+
MPI_Info_create(&info);
75+
MPI_Info_set(info, "FENIX_RESUME_MODE", "NO_JUMP");
76+
MPI_Info_set(info, "FENIX_UNHANDLED_MODE", "NO_JUMP");
77+
Fenix_Init(&fenix_role, MPI_COMM_WORLD, &res_comm, &argc, &argv, 0, 0, info, &error);
78+
79+
Fenix::register_exception_callback();
80+
81+
if(fenix_role == FENIX_ROLE_SURVIVOR_RANK){
82+
printf("FAILURE: longjmp instead of exception\n");
83+
status = 1;
84+
}
85+
86+
if (fenix_role == FENIX_ROLE_INITIAL_RANK) {
87+
int rank;
88+
MPI_Comm_rank(res_comm, &rank);
89+
if(rank == 1) raise(SIGKILL);
90+
91+
try {
92+
MPI_Barrier(res_comm);
93+
printf("FAILURE: barrier finished without fault\n");
94+
status = 1;
95+
} catch (Fenix::CommException e){
96+
printf("SUCCESS: caught CommException\n");
97+
}
98+
}
99+
100+
Fenix_Finalize();
101+
MPI_Finalize();
102+
103+
return status;
104+
}

0 commit comments

Comments
 (0)