Various changes (including temporary) to make XL OpenMP offload build working

Pramod Kumbhar · pramodk · commit fbf95519c456 · 2021-12-31T14:02:58.000+01:00
* todo: temporary changes to OpenAccHelper.cmake, needs refinement
 * todo: see caliper linkling issue
 * todo: _OPENACC needs to be renamed CORENRN_ENABLE_GPU so that OpenMP
         based builds can use GPU offload.
 * todo: hardcoded CXX flags for quick build
diff --git a/CMake/OpenAccHelper.cmake b/CMake/OpenAccHelper.cmake
@@ -50,31 +50,44 @@ if(CORENRN_ENABLE_GPU)
     endif()
     set(CORENRN_CUDA_VERSION_SHORT "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}")
   endif()
-  # -cuda links CUDA libraries and also seems to be important to make the NVHPC do the device code
-  # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code
-  # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in
-  # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP
-  # code is compiled with the same CUDA version as the explicit CUDA code.
-  set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo")
-  # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
-  # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
-  # same default compute capabilities as each other, particularly on GPU-less build machines.
-  foreach(compute_capability ${CMAKE_CUDA_ARCHITECTURES})
-    string(APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability}")
-  endforeach()
-  if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP")
-    # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
-    # for a region then prefer OpenMP.
-    add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD)
-    string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu")
-  elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC")
-    # Only enable OpenACC offload for GPU
-    string(APPEND NVHPC_ACC_COMP_FLAGS " -acc")
+  if(CORENRN_HAVE_NVHPC_COMPILER)
+    # -cuda links CUDA libraries and also seems to be important to make the NVHPC do the device code
+    # linking. Without this, we had problems with linking between the explicit CUDA (.cu) device code
+    # and offloaded OpenACC/OpenMP code. Using -cuda when compiling seems to improve error messages in
+    # some cases, and to be recommended by NVIDIA. We pass -gpu=cudaX.Y to ensure that OpenACC/OpenMP
+    # code is compiled with the same CUDA version as the explicit CUDA code.
+    set(NVHPC_ACC_COMP_FLAGS "-cuda -gpu=cuda${CORENRN_CUDA_VERSION_SHORT},lineinfo")
+    # Make sure that OpenACC code is generated for the same compute capabilities as the explicit CUDA
+    # code. Otherwise there may be confusing linker errors. We cannot rely on nvcc and nvc++ using the
+    # same default compute capabilities as each other, particularly on GPU-less build machines.
+    foreach(compute_capability ${CMAKE_CUDA_ARCHITECTURES})
+      string(APPEND NVHPC_ACC_COMP_FLAGS ",cc${compute_capability}")
+    endforeach()
+    if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP")
+      # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
+      # for a region then prefer OpenMP.
+      add_compile_definitions(CORENEURON_PREFER_OPENMP_OFFLOAD)
+      string(APPEND NVHPC_ACC_COMP_FLAGS " -mp=gpu")
+    elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC")
+      # Only enable OpenACC offload for GPU
+      string(APPEND NVHPC_ACC_COMP_FLAGS " -acc")
+    else()
+      message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
+    endif()
+  elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "XLClang")
+    set(NVHPC_ACC_COMP_FLAGS "-qsmp=omp -qoffload -qreport")
+    set(NVHPC_ACC_LINK_FLAGS "-qcuda -lcaliper")
+
+    if(CORENRN_ENABLE_OPENMP AND CORENRN_ENABLE_OPENMP_OFFLOAD)
+      # Enable OpenMP target offload to GPU and if both OpenACC and OpenMP directives are available
+      # for a region then prefer OpenMP.
+      add_compile_definitions(CORENRN_PREFER_OPENMP_OFFLOAD)
+    endif()
+  elseif(${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
+    set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}")
   else()
-    message(FATAL_ERROR "${CORENRN_ACCELERATOR_OFFLOAD} not supported with NVHPC compilers")
+    message(FATAL_ERROR "${CMAKE_CXX_COMPILER_ID} is not supported in GPU builds.")
   endif()
-  set(NVHPC_ACC_COMP_FLAGS "-fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Wno-unknown-cuda-version -I${CUDAToolkit_INCLUDE_DIRS}")
-  set(NVHPC_ACC_LINK_FLAGS)
   # avoid PGI adding standard compliant "-A" flags
   # set(CMAKE_CXX14_STANDARD_COMPILE_OPTION --c++14)
   string(APPEND CMAKE_EXE_LINKER_FLAGS " ${NVHPC_ACC_COMP_FLAGS}")
diff --git a/coreneuron/utils/offload.hpp b/coreneuron/utils/offload.hpp
@@ -9,7 +9,7 @@
 #include <cstddef>
 
 #define nrn_pragma_stringify(x) #x
-#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
+#if defined(CORENEURON_ENABLE_GPU) && defined(CORENRN_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
 #define nrn_pragma_acc(x)
 #define nrn_pragma_omp(x) _Pragma(nrn_pragma_stringify(omp x))
 #include <omp.h>
diff --git a/extra/nrnivmodl_core_makefile.in b/extra/nrnivmodl_core_makefile.in
@@ -71,7 +71,7 @@ ifeq ($(wildcard $(CORENRN_PERLEXE)),)
   CORENRN_PERLEXE=perl
 endif
 
-CXXFLAGS = @CORENRN_CXX_FLAGS@
+CXXFLAGS = @CORENRN_CXX_FLAGS@ -lcaliper -qcuda
 CXX_COMPILE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_CXX_COMPILE_OPTIONS_PIC@ @CORENRN_COMMON_COMPILE_DEFS@ $(INCLUDES)
 CXX_LINK_EXE_CMD = $(CXX) $(CXXFLAGS) @CMAKE_EXE_LINKER_FLAGS@
 CXX_SHARED_LIB_CMD = $(CXX) $(CXXFLAGS) @CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS@ @CMAKE_SHARED_LIBRARY_CXX_FLAGS@ @CMAKE_SHARED_LINKER_FLAGS@