@@ -129,45 +129,58 @@ void nrnthread_v_transfer(NrnThread* _nt) {
129129 }
130130}
131131
132- // / TODO: Corresponding exit data cluase for OpenACC/OpenMP is missing and hence
133- // / GPU buffers are not freed.
134- void nrn_partrans::gap_update_indices () {
132+ void nrn_partrans::copy_gap_indices_to_device () {
135133 // Ensure index vectors, src_gather, and insrc_buf_ are on the gpu.
136134 if (insrcdspl_) {
137135 int n_insrc_buf = insrcdspl_[nrnmpi_numprocs];
138- nrn_pragma_acc (enter data create (insrc_buf_ [ 0 :n_insrc_buf]) if (corenrn_param. gpu ))
136+ nrn_pragma_acc (enter data create (insrc_buf_[ :n_insrc_buf]))
139137 // clang-format off
140- nrn_pragma_omp (target enter data map (alloc: insrc_buf_[0 :n_insrc_buf])
141- if (corenrn_param.gpu ))
138+ nrn_pragma_omp (target enter data map (alloc: insrc_buf_[:n_insrc_buf]))
142139 // clang-format off
143140 }
144141 for (int tid = 0 ; tid < nrn_nthread; ++tid) {
145- TransferThreadData& ttd = transfer_thread_data_[tid];
142+ const NrnThread* nt = nrn_threads + tid;
143+ if (!nt->compute_gpu ) {
144+ continue ;
145+ }
146146
147- size_t n_src_indices = ttd.src_indices .size ();
148- size_t n_src_gather = ttd.src_gather .size ();
149- NrnThread* nt = nrn_threads + tid;
150- if (n_src_indices) {
151- int * src_indices = ttd.src_indices .data ();
152- double * src_gather = ttd.src_gather .data ();
153- nrn_pragma_acc (enter data copyin (src_indices[0 :n_src_indices]) if (nt->compute_gpu ))
154- nrn_pragma_acc (enter data create (src_gather[0 :n_src_gather]) if (nt->compute_gpu ))
155- // clang-format off
156- nrn_pragma_omp (target enter data map (to: src_indices [0 :n_src_indices])
157- map (alloc: src_gather[0 :n_src_gather])
158- if (nt->compute_gpu ))
159- // clang-format on
147+ const TransferThreadData& ttd = transfer_thread_data_[tid];
148+
149+ if (!ttd.src_indices .empty ()) {
150+ cnrn_target_copyin (ttd.src_indices .data (), ttd.src_indices .size ());
151+
152+ size_t n_src_gather = ttd.src_gather .size ();
153+ const double * src_gather = ttd.src_gather .data ();
154+ nrn_pragma_acc (enter data create (src_gather[:n_src_gather]))
155+ nrn_pragma_omp (target enter data map (alloc: src_gather[:n_src_gather]))
160156 }
161157
162158 if (ttd.insrc_indices .size ()) {
163- int * insrc_indices = ttd.insrc_indices .data ();
164- size_t n_insrc_indices = ttd.insrc_indices .size ();
165- nrn_pragma_acc (
166- enter data copyin (insrc_indices [0 :n_insrc_indices]) if (nt->compute_gpu ))
167- // clang-format off
168- nrn_pragma_omp (target enter data map (to: insrc_indices[0 :n_insrc_indices])
169- if (nt->compute_gpu ))
170- // clang-format on
159+ cnrn_target_copyin (ttd.insrc_indices .data (), ttd.insrc_indices .size ());
160+ }
161+ }
162+ }
163+
164+ void nrn_partrans::delete_gap_indices_from_device () {
165+ if (insrcdspl_) {
166+ int n_insrc_buf = insrcdspl_[nrnmpi_numprocs];
167+ cnrn_target_delete (insrc_buf_, n_insrc_buf);
168+ }
169+ for (int tid = 0 ; tid < nrn_nthread; ++tid) {
170+ const NrnThread* nt = nrn_threads + tid;
171+ if (!nt->compute_gpu ) {
172+ continue ;
173+ }
174+
175+ TransferThreadData& ttd = transfer_thread_data_[tid];
176+
177+ if (!ttd.src_indices .empty ()) {
178+ cnrn_target_delete (ttd.src_indices .data (), ttd.src_indices .size ());
179+ cnrn_target_delete (ttd.src_gather .data (), ttd.src_gather .size ());
180+ }
181+
182+ if (!ttd.insrc_indices .empty ()) {
183+ cnrn_target_delete (ttd.insrc_indices .data (), ttd.insrc_indices .size ());
171184 }
172185 }
173186}
0 commit comments