This repository was archived by the owner on Apr 28, 2023. It is now read-only.
File tree Expand file tree Collapse file tree 3 files changed +9
-3
lines changed Expand file tree Collapse file tree 3 files changed +9
-3
lines changed Original file line number Diff line number Diff line change @@ -31,9 +31,11 @@ namespace code {
3131namespace c {
3232
3333constexpr auto types = R"C(
34+ #ifndef __CUDACC_RTC__
3435// Can't include system dependencies with NVRTC
3536// Can't include cuda_fp16.h with NVRTC due to transitive system dependencies
36- // #include <cuda_fp16.h>
37+ #include <cuda_fp16.h>
38+ #endif
3739)C" ;
3840
3941constexpr auto defines = R"C(
@@ -211,7 +213,12 @@ struct SegmentedReducer {
211213
212214constexpr auto cubBlockReduce = R"CUDA(
213215
216+ #if __CUDACC_RTC__
214217#include "cub/nvrtc_cub.cuh"
218+ #else
219+ #include <assert.h>
220+ #include "cub/cub.cuh"
221+ #endif
215222
216223namespace __tc {
217224
Original file line number Diff line number Diff line change @@ -128,7 +128,6 @@ static std::string llvmCompile(
128128 std::string (" -I" ) + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR),
129129 std::string (" -I" ) + TC_STRINGIFY (TC_CUB_INCLUDE_DIR),
130130 tc::FLAGS_llvm_flags,
131- " -DNVRTC_CUB=1" ,
132131 " -nocudalib" ,
133132 " -S" ,
134133 " -emit-llvm" ,
Original file line number Diff line number Diff line change @@ -49,7 +49,7 @@ DEFINE_string(
4949 " compiler flags to set when llvm is used" );
5050DEFINE_string (
5151 nvcc_flags,
52- " -std=c++11 -ptx -DNVRTC_CUB=1 - -use_fast_math" ,
52+ " -std=c++11 -ptx --use_fast_math" ,
5353 " compiler flags to set when nvcc is used" );
5454
5555// CPU codegen options
You can’t perform that action at this time.
0 commit comments