Skip to content

Commit 75ceff0

Browse files
New basic convenience functions
1 parent b6b84f4 commit 75ceff0

File tree

8 files changed

+67
-0
lines changed

8 files changed

+67
-0
lines changed

include/fenix.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,9 @@ Fenix_Rank_role Fenix_get_role();
300300
//!@brief Returns the error value from Fenix_Init or the latest recovery
301301
int Fenix_get_error();
302302

303+
//!@brief Returns the number of spare ranks currently available to Fenix
304+
int Fenix_get_nspare();
305+
303306
/**
304307
* @brief Get the list of ranks that failed in the most recent failure.
305308
* @param[out] fail_list Set to a list of failed ranks.

include/fenix.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@
6060

6161
#include <mpi.h>
6262
#include <functional>
63+
#include <vector>
64+
#include <optional>
6365
#include "fenix.h"
6466
#include "fenix_exception.hpp"
6567
#include "fenix_data_subset.hpp"
@@ -116,12 +118,21 @@ Fenix_Rank_role role();
116118
//!@brief Overload of #Fenix_get_error
117119
int error();
118120

121+
//!@brief Overload of #Fenix_get_nspare
122+
int nspare();
123+
119124
//!@brief Overload of #Fenix_Callback_register
120125
int callback_register(std::function<void(MPI_Comm, int)> callback);
121126

122127
//@!brief Overload of #Fenix_Callback_pop
123128
int callback_pop();
124129

130+
/**
131+
* @brief Get the failed ranks from the most recent recovery
132+
* @return vector of failed ranks
133+
*/
134+
std::vector<int> fail_list();
135+
125136
//!@brief Overload of #Fenix_Process_detect_failures
126137
int detect_failures(bool recover = true);
127138

@@ -183,6 +194,18 @@ int commit(int group_id, int* time_stamp = nullptr);
183194
//@!brief overload of #Fenix_Data_commit
184195
int commit_barrier(int group_id, int* time_stamp = nullptr);
185196

197+
/**
198+
* @brief get the members of a group
199+
* @return vector of member IDs of each member in group_id if group exists
200+
*/
201+
std::optional<std::vector<int>> group_members(int group_id);
202+
203+
/**
204+
* @brief get the snapshots of a group
205+
* @return vector of timestamps of each snapshot in group_id if group exists
206+
*/
207+
std::optional<std::vector<int>> group_snapshots(int group_id);
208+
186209
//@!brief Overload of #Fenix_Data_snapshot_delete
187210
int snapshot_delete(int group_id, int timestamp);
188211

include/fenix_data_group.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ struct fenix_group_t {
8484
int policy_name;
8585
std::map<int, fenix_member_entry_t> members;
8686

87+
std::vector<int> get_member_ids();
8788
//Search for id, returning {-1, nullptr} if not found.
8889
Fenix::Data::member_iterator search_member(int id);
8990
//As search_member, but print an error message if id not found.
@@ -105,6 +106,7 @@ struct fenix_group_t {
105106
virtual int member_restore_from_rank(int member_id, void* target_bugger, int max, int timestamp, int source_rank) = 0;
106107
virtual int get_number_of_snapshots(int* num) = 0;
107108
virtual int get_snapshot_at_position(int position, int* timestamp) = 0;
109+
virtual std::vector<int> get_snapshots() = 0;
108110
virtual int reinit(int* flag) = 0;
109111
virtual int member_get_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag, int sourcerank) = 0;
110112
virtual int member_set_attribute(fenix_member_entry_t* mentry, int name, void* value, int* flag) = 0;

include/fenix_data_policy_in_memory_raid.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ struct Group : public fenix_group_t {
226226

227227
int get_number_of_snapshots(int* number_of_snapshots) override;
228228
int get_snapshot_at_position(int position, int* timestamp) override;
229+
std::vector<int> get_snapshots();
229230

230231
int reinit(int* flag) override;
231232
};

src/fenix.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,10 @@ int Fenix_get_error(){
261261
return error();
262262
}
263263

264+
int Fenix_get_nspare(){
265+
return nspare();
266+
}
267+
264268
namespace Fenix {
265269

266270
void init(const Args::FenixInitArgs args){
@@ -282,6 +286,10 @@ int error(){
282286
return fenix.repair_result;
283287
}
284288

289+
int nspare(){
290+
return fenix.spare_ranks;
291+
}
292+
285293
int callback_register(std::function<void(MPI_Comm, int)> callback){
286294
return __fenix_callback_register(callback);
287295
}
@@ -290,6 +298,11 @@ int callback_pop() {
290298
return __fenix_callback_pop();
291299
}
292300

301+
std::vector<int> fail_list(){
302+
if(fenix.fail_world_size == 0) return {};
303+
return {fenix.fail_world, fenix.fail_world+fenix.fail_world_size};
304+
}
305+
293306
int detect_failures(bool recover){
294307
return __fenix_detect_failures(recover);
295308
}

src/fenix_data_group.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,15 @@ member_iterator fenix_group_t::find_member(int id){
9292
return it;
9393
}
9494

95+
std::vector<int> fenix_group_t::get_member_ids(){
96+
std::vector<int> ret;
97+
ret.reserve(members.size());
98+
for(const auto& [k, v] : members){
99+
ret.push_back(k);
100+
}
101+
return ret;
102+
}
103+
95104
fenix_data_recovery_t * __fenix_data_recovery_init() {
96105
fenix_data_recovery_t *data_recovery = (fenix_data_recovery_t *)
97106
s_calloc(1, sizeof(fenix_data_recovery_t));

src/fenix_data_policy_in_memory_raid.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -796,6 +796,10 @@ int Group::get_snapshot_at_position(int idx, int* snapshot){
796796
return FENIX_SUCCESS;
797797
}
798798

799+
std::vector<int> Group::get_snapshots(){
800+
return {timestamps.begin(), timestamps.end()};
801+
}
802+
799803
int Group::member_restore(
800804
int member_id, void* target_buffer, int max_count, int ts,
801805
DataSubset& data_found

src/fenix_data_recovery.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,18 @@ int __fenix_get_member_at_position(int group_id, int *member_id, int position) {
556556
return FENIX_SUCCESS;
557557
}
558558

559+
std::optional<std::vector<int>> group_members(int group_id){
560+
auto [group_index, group] = find_group(group_id);
561+
if(!group) return {};
562+
return group->get_member_ids();
563+
}
564+
565+
std::optional<std::vector<int>> group_snapshots(int group_id){
566+
auto [group_index, group] = find_group(group_id);
567+
if(!group) return {};
568+
return group->get_snapshots();
569+
}
570+
559571
/**
560572
* @brief
561573
* @param group_id

0 commit comments

Comments
 (0)