Skip to content

Commit 2fcdd59

Browse files
Update data subsets and policies to c++, implement storev and resizeable members
1 parent ed9766d commit 2fcdd59

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+3509
-2304
lines changed

examples/01_hello_world/fenix/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
add_executable(fenix_hello_world fenix_hello_world.c)
1212
target_link_libraries(fenix_hello_world fenix ${MPI_C_LIBRARIES})
13+
set_target_properties(fenix_hello_world PROPERTIES LINKER_LANGUAGE C)
1314

1415
if(BUILD_TESTING)
1516
add_test(NAME hello_world

examples/01_hello_world/fenix/fenix_hello_world.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include <mpi.h>
5959
#include <stdio.h>
6060
#include <signal.h>
61+
#include <stdlib.h>
6162
#include <sys/types.h>
6263
#include <unistd.h>
6364

examples/02_send_recv/fenix/CMakeLists.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
#
1010

1111
add_executable(fenix_ring fenix_ring.c)
12-
target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES} m )
12+
target_link_libraries(fenix_ring fenix ${MPI_C_LIBRARIES})
13+
set_target_properties(fenix_ring PROPERTIES LINKER_LANGUAGE C)
1314

1415
if(BUILD_TESTING)
15-
add_test(NAME ring
16+
add_test(NAME send_recv
1617
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} fenix_ring ${MPIEXEC_POSTFLAGS} 1 2)
17-
set_tests_properties(ring PROPERTIES
18+
set_tests_properties(send_recv PROPERTIES
1819
FAIL_REGULAR_EXPRESSION "FAILURE")
1920
endif()
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/*
2+
//@HEADER
3+
// ************************************************************************
4+
//
5+
//
6+
// _|_|_|_| _|_|_|_| _| _| _|_|_| _| _|
7+
// _| _| _|_| _| _| _| _|
8+
// _|_|_| _|_|_| _| _| _| _| _|
9+
// _| _| _| _|_| _| _| _|
10+
// _| _|_|_|_| _| _| _|_|_| _| _|
11+
//
12+
//
13+
//
14+
//
15+
// Copyright (C) 2016 Rutgers University and Sandia Corporation
16+
//
17+
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
18+
// the U.S. Government retains certain rights in this software.
19+
//
20+
// Redistribution and use in source and binary forms, with or without
21+
// modification, are permitted provided that the following conditions are
22+
// met:
23+
//
24+
// 1. Redistributions of source code must retain the above copyright
25+
// notice, this list of conditions and the following disclaimer.
26+
//
27+
// 2. Redistributions in binary form must reproduce the above copyright
28+
// notice, this list of conditions and the following disclaimer in the
29+
// documentation and/or other materials provided with the distribution.
30+
//
31+
// 3. Neither the name of the Corporation nor the names of the
32+
// contributors may be used to endorse or promote products derived from
33+
// this software without specific prior written permission.
34+
//
35+
// THIS SOFTWARE IS PROVIDED BY RUTGERS UNIVERSITY and SANDIA CORPORATION
36+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
37+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
38+
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL RUTGERS
39+
// UNIVERISY, SANDIA CORPORATION OR THE CONTRIBUTORS BE LIABLE FOR ANY
40+
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
41+
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
42+
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
43+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
44+
// IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
45+
// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
46+
// IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47+
//
48+
// Author Marc Gamell, Eric Valenzuela, Keita Teranishi, Manish Parashar,
49+
// Michael Heroux, and Matthew Whitlock
50+
//
51+
// Questions? Contact Keita Teranishi (knteran@sandia.gov) and
52+
// Marc Gamell (mgamell@cac.rutgers.edu)
53+
//
54+
// ************************************************************************
55+
//@HEADER
56+
*/
57+
58+
#include <mpi.h>
59+
#include <mpi-ext.h>
60+
#include <stdio.h>
61+
#include <signal.h>
62+
63+
int main(int argc, char **argv) {
64+
MPI_Init(&argc, &argv);
65+
66+
MPI_Comm world;
67+
MPI_Comm_dup(MPI_COMM_WORLD, &world);
68+
69+
MPI_Comm_set_errhandler(world, MPI_ERRORS_RETURN);
70+
71+
int rank;
72+
MPI_Comm_rank(world, &rank);
73+
74+
if(rank == 2){
75+
MPI_Barrier(world);
76+
fprintf(stderr, "Rank %d raising SIGKILL\n", rank);
77+
raise(SIGKILL);
78+
} else {
79+
int ret;
80+
while((ret = MPI_Barrier(world)) == MPI_SUCCESS) { }
81+
fprintf(stderr, "Rank %d caught return %d\n", rank, ret);
82+
}
83+
84+
int whatever = 0;
85+
MPIX_Comm_agree(world, &whatever);
86+
fprintf(stderr, "Rank %d agreed to %d\n", rank, whatever);
87+
88+
MPI_Comm new_world;
89+
MPIX_Comm_shrink(world, &new_world);
90+
91+
MPI_Barrier(new_world);
92+
93+
fprintf(stderr, "Rank %d completed\n", rank);
94+
return 0;
95+
}

examples/05_subset_create/subset_create.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,12 @@
6666
int max_iter = 2;
6767
const int kCount = 100;
6868
const int kKillID = 2;
69+
const int my_group = 0;
70+
const int my_member = 0;
6971

7072
int main(int argc, char **argv) {
71-
fprintf(stderr, "Started\n");
7273
int i;
73-
int subset[500];
74+
int subset[kCount];
7475
MPI_Status status;
7576

7677
if (argc < 2) {
@@ -86,7 +87,6 @@ fprintf(stderr, "Started\n");
8687
int num_ranks;
8788
int rank;
8889
int error;
89-
int my_group = 0;
9090
int my_timestamp = 0;
9191
int my_depth = 1;
9292
int recovered = 0;
@@ -120,24 +120,33 @@ fprintf(stderr, "Started\n");
120120

121121
if (fenix_role == FENIX_ROLE_INITIAL_RANK) {
122122
// init my subset data
123-
int index;
124-
for (index = 0; index < kCount; index++) {
123+
for (int index = 0; index < kCount; index++) {
125124
subset[index] = -1;
126125
}
127126

128-
Fenix_Data_member_create(my_group, 777, subset, kCount, MPI_INT);
127+
Fenix_Data_member_create(my_group, my_member, subset, kCount, MPI_INT);
129128

130129
//Store the entire data set for the initial commit. This is not a requirement.
131-
Fenix_Data_member_store(my_group, 777, FENIX_DATA_SUBSET_FULL);
130+
Fenix_Data_member_store(my_group, my_member, FENIX_DATA_SUBSET_FULL);
132131
Fenix_Data_commit_barrier(my_group, NULL);
133132

134133
} else {
135134
//We've had a failure! Time to recover data.
136-
fprintf(stderr, "Starting data recovery on node %d\n", rank);
137-
Fenix_Data_member_restore(my_group, 777, subset, kCount, FENIX_TIME_STAMP_MAX, NULL);
135+
fprintf(stderr, "Starting data recovery on rank %d\n", rank);
136+
137+
//Set all data to a value that was never stored
138+
for (int index = 0; index < kCount; index++) {
139+
subset[index] = -2;
140+
}
141+
142+
int restore_ret = Fenix_Data_member_restore(my_group, my_member, subset, kCount, FENIX_TIME_STAMP_MAX, NULL);
143+
144+
if(restore_ret != FENIX_SUCCESS){
145+
fprintf(stderr, "Rank %d restore failure w/ code %d\n", rank, restore_ret);
146+
}
138147

139148
int out_flag;
140-
Fenix_Data_member_attr_set(my_group, 777, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER,
149+
Fenix_Data_member_attr_set(my_group, my_member, FENIX_DATA_MEMBER_ATTRIBUTE_BUFFER,
141150
subset, &out_flag);
142151

143152

@@ -159,20 +168,19 @@ fprintf(stderr, "Started\n");
159168
//We'll store only the small subset that we specified, though.
160169
//This means that as far as Fenix is concerned only data within that
161170
//subset was ever changed from the initialized value of -1
162-
Fenix_Data_member_store(my_group, 777, subset_specifier);
171+
Fenix_Data_member_store(my_group, my_member, subset_specifier);
163172
Fenix_Data_commit_barrier(my_group, NULL);
164173

165174
MPI_Barrier(new_comm); //Make sure everyone is done committing before we kill and restart everyone
166175
//else we may end up with only some nodes having the commit, and it being unusable
167-
168176
}
169177

170178

171179
//Kill a rank to test that we can recover from the commits we've made.
172180
if (rank == kKillID && recovered == 0) {
173181
fprintf(stderr, "Doing kill on node %d\n", rank);
174182
pid_t pid = getpid();
175-
kill(pid, SIGTERM);
183+
kill(pid, SIGKILL);
176184
}
177185

178186
//Make sure we've let rank 2 fail before proceeding, so we're definitely checking
@@ -214,6 +222,6 @@ fprintf(stderr, "Started\n");
214222

215223

216224
Fenix_Finalize();
217-
MPI_Finalize();
225+
//MPI_Finalize();
218226
return !successful; //return error status
219227
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#
2+
# This file is part of Fenix
3+
# Copyright (c) 2016 Rutgers University and Sandia Corporation.
4+
# This software is distributed under the BSD License.
5+
# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
6+
# the U.S. Government retains certain rights in this software.
7+
# For more information, see the LICENSE file in the top Fenix
8+
# directory.
9+
#
10+
11+
add_executable(resizeable resizeable.cpp)
12+
target_link_libraries(resizeable fenix ${MPI_C_LIBRARIES})
13+
14+
target_compile_features(resizeable PRIVATE cxx_std_20)
15+
16+
if(BUILD_TESTING)
17+
add_test(NAME resizeable
18+
COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 5 ${MPIEXEC_PREFLAGS} resizeable ${MPIEXEC_POSTFLAGS} 1)
19+
set_tests_properties(resizeable PROPERTIES
20+
FAIL_REGULAR_EXPRESSION "FAILURE" LABELS "Example")
21+
endif()

0 commit comments

Comments
 (0)