Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit 4d2893d

Browse files
author
Nicolas Cornu
authored
Add a function to delete gap indices from device (#730)
Fix #727
1 parent 423ae6c commit 4d2893d

File tree

3 files changed

+50
-31
lines changed

3 files changed

+50
-31
lines changed

coreneuron/apps/main1.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,10 @@ void nrn_init_and_load_data(int argc,
352352
allocate_data_in_mechanism_nrn_init();
353353
}
354354

355-
if (nrn_have_gaps) {
356-
nrn_partrans::gap_update_indices();
355+
if (corenrn_param.gpu) {
356+
if (nrn_have_gaps) {
357+
nrn_partrans::copy_gap_indices_to_device();
358+
}
357359
}
358360

359361
// call prcellstate for prcellgid
@@ -676,6 +678,9 @@ extern "C" int run_solve_core(int argc, char** argv) {
676678
// cleanup threads on GPU
677679
if (corenrn_param.gpu) {
678680
delete_nrnthreads_on_device(nrn_threads, nrn_nthread);
681+
if (nrn_have_gaps) {
682+
nrn_partrans::delete_gap_indices_from_device();
683+
}
679684
}
680685

681686
// Cleaning the memory

coreneuron/network/partrans.cpp

Lines changed: 41 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -129,45 +129,58 @@ void nrnthread_v_transfer(NrnThread* _nt) {
129129
}
130130
}
131131

132-
/// TODO: Corresponding exit data cluase for OpenACC/OpenMP is missing and hence
133-
/// GPU buffers are not freed.
134-
void nrn_partrans::gap_update_indices() {
132+
void nrn_partrans::copy_gap_indices_to_device() {
135133
// Ensure index vectors, src_gather, and insrc_buf_ are on the gpu.
136134
if (insrcdspl_) {
137135
int n_insrc_buf = insrcdspl_[nrnmpi_numprocs];
138-
nrn_pragma_acc(enter data create(insrc_buf_ [0:n_insrc_buf]) if (corenrn_param.gpu))
136+
nrn_pragma_acc(enter data create(insrc_buf_[:n_insrc_buf]))
139137
// clang-format off
140-
nrn_pragma_omp(target enter data map(alloc: insrc_buf_[0:n_insrc_buf])
141-
if(corenrn_param.gpu))
138+
nrn_pragma_omp(target enter data map(alloc: insrc_buf_[:n_insrc_buf]))
142139
// clang-format off
143140
}
144141
for (int tid = 0; tid < nrn_nthread; ++tid) {
145-
TransferThreadData& ttd = transfer_thread_data_[tid];
142+
const NrnThread* nt = nrn_threads + tid;
143+
if (!nt->compute_gpu) {
144+
continue;
145+
}
146146

147-
size_t n_src_indices = ttd.src_indices.size();
148-
size_t n_src_gather = ttd.src_gather.size();
149-
NrnThread* nt = nrn_threads + tid;
150-
if (n_src_indices) {
151-
int* src_indices = ttd.src_indices.data();
152-
double* src_gather = ttd.src_gather.data();
153-
nrn_pragma_acc(enter data copyin(src_indices[0:n_src_indices]) if(nt->compute_gpu))
154-
nrn_pragma_acc(enter data create(src_gather[0:n_src_gather]) if(nt->compute_gpu))
155-
// clang-format off
156-
nrn_pragma_omp(target enter data map(to: src_indices [0:n_src_indices])
157-
map(alloc: src_gather[0:n_src_gather])
158-
if(nt->compute_gpu))
159-
// clang-format on
147+
const TransferThreadData& ttd = transfer_thread_data_[tid];
148+
149+
if (!ttd.src_indices.empty()) {
150+
cnrn_target_copyin(ttd.src_indices.data(), ttd.src_indices.size());
151+
152+
size_t n_src_gather = ttd.src_gather.size();
153+
const double* src_gather = ttd.src_gather.data();
154+
nrn_pragma_acc(enter data create(src_gather[:n_src_gather]))
155+
nrn_pragma_omp(target enter data map(alloc: src_gather[:n_src_gather]))
160156
}
161157

162158
if (ttd.insrc_indices.size()) {
163-
int* insrc_indices = ttd.insrc_indices.data();
164-
size_t n_insrc_indices = ttd.insrc_indices.size();
165-
nrn_pragma_acc(
166-
enter data copyin(insrc_indices [0:n_insrc_indices]) if (nt->compute_gpu))
167-
// clang-format off
168-
nrn_pragma_omp(target enter data map(to: insrc_indices[0:n_insrc_indices])
169-
if(nt->compute_gpu))
170-
// clang-format on
159+
cnrn_target_copyin(ttd.insrc_indices.data(), ttd.insrc_indices.size());
160+
}
161+
}
162+
}
163+
164+
void nrn_partrans::delete_gap_indices_from_device() {
165+
if (insrcdspl_) {
166+
int n_insrc_buf = insrcdspl_[nrnmpi_numprocs];
167+
cnrn_target_delete(insrc_buf_, n_insrc_buf);
168+
}
169+
for (int tid = 0; tid < nrn_nthread; ++tid) {
170+
const NrnThread* nt = nrn_threads + tid;
171+
if (!nt->compute_gpu) {
172+
continue;
173+
}
174+
175+
TransferThreadData& ttd = transfer_thread_data_[tid];
176+
177+
if (!ttd.src_indices.empty()) {
178+
cnrn_target_delete(ttd.src_indices.data(), ttd.src_indices.size());
179+
cnrn_target_delete(ttd.src_gather.data(), ttd.src_gather.size());
180+
}
181+
182+
if (!ttd.insrc_indices.empty()) {
183+
cnrn_target_delete(ttd.insrc_indices.data(), ttd.insrc_indices.size());
171184
}
172185
}
173186
}

coreneuron/network/partrans.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,8 @@ extern SetupTransferInfo* setup_info_; /* array for threads exists only during s
107107

108108
extern void gap_mpi_setup(int ngroup);
109109
extern void gap_data_indices_setup(NrnThread* nt);
110-
extern void gap_update_indices();
110+
extern void copy_gap_indices_to_device();
111+
extern void delete_gap_indices_from_device();
111112
extern void gap_cleanup();
112113

113114
extern double* insrc_buf_; // Receive buffer for gap voltages

0 commit comments

Comments
 (0)