fixups for llvm

olupton · pramodk · commit b440d11408a4 · 2021-12-31T14:02:58.000+01:00
diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
@@ -73,14 +73,17 @@ if(CORENRN_ENABLE_GPU)
   else()
     message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
   endif()
+  set(NVHPC_ACC_COMP_FLAGS)
+  set(NVHPC_ACC_LINK_FLAGS)
   # avoid PGI adding standard compliant "-A" flags
-  set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
+  # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
   string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}")
   # Use `-Mautoinline` option to compile .cpp files generated from .mod files only. This is
   # especially needed when we compile with -O0 or -O1 optimisation level where we get link errors.
   # Use of `-Mautoinline` ensure that the necessary functions like `net_receive_kernel` are inlined
   # for OpenACC code generation.
   set(NVHPC_CXX_INLINE_FLAGS "-Mautoinline")
+  set(NVHPC_CXX_INLINE_FLAGS)
 endif()
 
 # =============================================================================
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -127,13 +127,13 @@ if(CORENRN_ENABLE_GPU)
   # CUDA_RESOLVE_DEVICE_SYMBOLS OFF)
 
   # Fail hard and early if we don't have the PGI/NVHPC compiler.
-  if(NOT CORENRN_HAVE_NVHPC_COMPILER)
-    message(
-      FATAL_ERROR
-        "GPU support is available via OpenACC using PGI/NVIDIA compilers."
-        " Use NVIDIA HPC SDK with -DCMAKE_C_COMPILER=nvc -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvc++"
-    )
-  endif()
+  # if(NOT CORENRN_HAVE_NVHPC_COMPILER)
+  #   message(
+  #     FATAL_ERROR
+  #       "GPU support is available via OpenACC using PGI/NVIDIA compilers."
+  #       " Use NVIDIA HPC SDK with -DCMAKE_C_COMPILER=nvc -DCMAKE_CUDA_COMPILER=nvcc -DCMAKE_CXX_COMPILER=nvc++"
+  #   )
+  # endif()
 
   # Set some sensible default CUDA architectures.
   if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
diff --git a/coreneuron/CMakeLists.txt b/coreneuron/CMakeLists.txt
@@ -198,6 +198,8 @@ if(CORENRN_ENABLE_MPI AND NOT CORENRN_ENABLE_MPI_DYNAMIC)
   target_link_libraries(coreneuron ${MPI_CXX_LIBRARIES})
 endif()
 
+target_link_libraries(coreneuron CUDA::cudart)
+
 # this is where we handle dynamic mpi library build
 if(CORENRN_ENABLE_MPI AND CORENRN_ENABLE_MPI_DYNAMIC)
   # ~~~
diff --git a/coreneuron/permute/cellorder.cpp b/coreneuron/permute/cellorder.cpp
@@ -490,6 +490,7 @@ static void triang_interleaved2(NrnThread* nt, int icore, int ncycle, int* strid
     bool has_subtrees_to_compute = true;
 
     // clang-format off
+    // OL211207: check if we need an OpenMP directive here.
     nrn_pragma_acc(loop seq)
     for (; has_subtrees_to_compute; ) {  // ncycle loop
 #ifndef CORENEURON_ENABLE_GPU
@@ -538,6 +539,7 @@ static void bksub_interleaved2(NrnThread* nt,
 #ifndef CORENEURON_ENABLE_GPU
     for (int i = root; i < lastroot; i += 1) {
 #else
+    // OL211207: check if we need an OpenMP directive here.
     nrn_pragma_acc(loop seq)
     for (int i = root; i < lastroot; i += warpsize) {
 #endif
@@ -661,7 +663,7 @@ void solve_interleaved1(int ith) {
                                          lastnode [0:ncell],
                                          cellsize [0:ncell]) if (nt->compute_gpu)
                        async(nt->stream_id))
-    nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
+    // nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
     for (int icell = 0; icell < ncell; ++icell) {
         int icellsize = cellsize[icell];
         triang_interleaved(nt, icell, icellsize, nstride, stride, lastnode);