@@ -65,6 +65,17 @@ void checkOrCreateContext() {
6565}
6666
6767namespace {
68+ static void checkedSystemCall (
69+ const std::string& cmd,
70+ const std::vector<std::string>& args) {
71+ std::stringstream command;
72+ command << cmd << " " ;
73+ for (const auto & s : args) {
74+ command << s << " " ;
75+ }
76+ TC_CHECK_EQ (std::system (command.str ().c_str ()), 0 ) << command.str ();
77+ }
78+
6879static std::tuple<int , int , int > getCudaArchitecture () {
6980 int device, major, minor;
7081 CUdevice deviceHandle;
@@ -107,30 +118,48 @@ static std::string llvmCompile(
107118 std::remove (outputPtxFile.c_str ());
108119 });
109120
110- std::string cmdLlvmIr = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) +
111- " /clang++ -x cuda " + inputFileName + " " + " --cuda-device-only " +
112- " --cuda-gpu-arch=" + arch + " " +
113- " --cuda-path=" + TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR) + " " + " -I" +
114- TC_STRINGIFY (TC_CUDA_INCLUDE_DIR) + " " + " -I" +
115- TC_STRINGIFY (TC_CUB_INCLUDE_DIR) + " " + tc::FLAGS_llvm_flags +
116- " -DNVRTC_CUB=1 " + " -nocudalib -S -emit-llvm " + " -o " +
117- outputClangFile;
118- TC_CHECK_EQ (std::system (cmdLlvmIr.c_str ()), 0 ) << cmdLlvmIr;
119-
120- std::string cmdLlvmLink = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) +
121- " /llvm-link " + outputClangFile + " " +
122- TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR) +
123- " /nvvm/libdevice/libdevice.*.bc " + " -S -o " + outputLinkFile;
124- TC_CHECK_EQ (std::system (cmdLlvmLink.c_str ()), 0 ) << cmdLlvmLink;
125-
126- std::string cmdOpt = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /opt " +
127- " -internalize -internalize-public-api-list=" + name + " " +
128- " -nvvm-reflect -O3 " + outputLinkFile + " -S -o " + outputOptFile;
129- TC_CHECK_EQ (std::system (cmdOpt.c_str ()), 0 ) << cmdOpt;
130-
131- std::string cmdPtx = std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) +
132- " /llc -mcpu=" + arch + " " + outputOptFile + " -o " + outputPtxFile;
133- TC_CHECK_EQ (std::system (cmdPtx.c_str ()), 0 ) << cmdPtx;
121+ // Compile
122+ checkedSystemCall (
123+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /clang++" ,
124+ {" -x cuda " + inputFileName,
125+ " --cuda-device-only" ,
126+ std::string (" --cuda-gpu-arch=" ) + arch,
127+ std::string (" --cuda-path=" ) + TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR),
128+ std::string (" -I" ) + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR),
129+ std::string (" -I" ) + TC_STRINGIFY (TC_CUB_INCLUDE_DIR),
130+ tc::FLAGS_llvm_flags,
131+ " -DNVRTC_CUB=1" ,
132+ " -nocudalib" ,
133+ " -S" ,
134+ " -emit-llvm" ,
135+ " -o " + outputClangFile});
136+
137+ // Link libdevice before opt
138+ checkedSystemCall (
139+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /llvm-link " ,
140+ {outputClangFile,
141+ std::string (TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR)) +
142+ " /nvvm/libdevice/libdevice.*.bc" ,
143+ " -S" ,
144+ " -o " + outputLinkFile});
145+
146+ // Opt
147+ checkedSystemCall (
148+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /opt" ,
149+ {" -internalize" ,
150+ std::string (" -internalize-public-api-list=" ) + name,
151+ " -nvvm-reflect" ,
152+ " -O3" ,
153+ outputLinkFile,
154+ " -S" ,
155+ std::string (" -o " ) + outputOptFile});
156+
157+ // Ptx
158+ checkedSystemCall (
159+ std::string (TC_STRINGIFY (TC_LLVM_BIN_DIR)) + " /llc" ,
160+ {std::string (" -mcpu=" ) + arch,
161+ outputOptFile,
162+ std::string (" -o " ) + outputPtxFile});
134163
135164 std::ifstream stream (outputPtxFile);
136165 return std::string (
@@ -160,12 +189,16 @@ static std::string nvccCompile(
160189 // cstdio's std::remove to delete files
161190 tc::ScopeGuard sgo ([&]() { std::remove (outputPtxFile.c_str ()); });
162191
163- std::string cmdPtx = std::string (TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR)) +
164- " /bin/nvcc -x cu " + inputFileName + " --gpu-architecture=" + arch + " " +
165- " --ptx " + " -I" + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR) + " " + " -I" +
166- TC_STRINGIFY (TC_CUB_INCLUDE_DIR) + " " + tc::FLAGS_nvcc_flags + " -o " +
167- outputPtxFile;
168- TC_CHECK_EQ (std::system (cmdPtx.c_str ()), 0 ) << cmdPtx;
192+ checkedSystemCall (
193+ std::string (TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR)) + " /bin/nvcc" ,
194+ {" -x cu" ,
195+ inputFileName,
196+ std::string (" --gpu-architecture=" ) + arch,
197+ " --ptx" ,
198+ std::string (" -I" ) + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR),
199+ std::string (" -I" ) + TC_STRINGIFY (TC_CUB_INCLUDE_DIR),
200+ tc::FLAGS_nvcc_flags,
201+ std::string (" -o " ) + outputPtxFile});
169202
170203 std::ifstream stream (outputPtxFile);
171204 return std::string (
0 commit comments