@@ -138,6 +138,41 @@ static std::string llvmCompile(
138138 std::istreambuf_iterator<char >());
139139}
140140
141+ static std::string nvccCompile (
142+ const std::string& name,
143+ const std::string& source) {
144+ int device, major, minor;
145+ std::tie (device, major, minor) = getCudaArchitecture ();
146+
147+ std::string pat (" /tmp/cudaXXXXXX" );
148+ std::vector<char > ifn (pat.begin (), pat.end ());
149+ TC_CHECK_GE (mkstemp (ifn.data ()), 0 ); // string.c_str is const char*
150+ std::string inputFileName (ifn.begin (), ifn.end ());
151+ // cstdio's std::remove to delete files
152+ tc::ScopeGuard sgi ([&]() { std::remove (inputFileName.c_str ()); });
153+ {
154+ std::ofstream ostream (inputFileName, std::ios::binary);
155+ ostream << source;
156+ }
157+
158+ std::string arch = " sm_" + std::to_string (major) + std::to_string (minor);
159+ std::string outputPtxFile = inputFileName + " .ptx" ;
160+ // cstdio's std::remove to delete files
161+ tc::ScopeGuard sgo ([&]() { std::remove (outputPtxFile.c_str ()); });
162+
163+ std::string cmdPtx = std::string (TC_STRINGIFY (TC_CUDA_TOOLKIT_ROOT_DIR)) +
164+ " /bin/nvcc -x cu " + inputFileName + " --gpu-architecture=" + arch + " " +
165+ " --ptx " + " -I" + TC_STRINGIFY (TC_CUDA_INCLUDE_DIR) + " " + " -I" +
166+ TC_STRINGIFY (TC_CUB_INCLUDE_DIR) + " " + tc::FLAGS_nvcc_flags + " -o " +
167+ outputPtxFile;
168+ TC_CHECK_EQ (std::system (cmdPtx.c_str ()), 0 ) << cmdPtx;
169+
170+ std::ifstream stream (outputPtxFile);
171+ return std::string (
172+ (std::istreambuf_iterator<char >(stream)),
173+ std::istreambuf_iterator<char >());
174+ }
175+
141176static std::string nvrtcCompile (
142177 const std::string& name,
143178 const std::string& source) {
@@ -209,8 +244,7 @@ std::unique_ptr<CudaRTCFunction> CudaRTCFunction::Compile(
209244 } else if (FLAGS_cuda_compiler == " llvm" ) {
210245 res->ptx = llvmCompile (name, source);
211246 } else if (FLAGS_cuda_compiler == " nvcc" ) {
212- CHECK (false ) << " NYI" ;
213- // res->ptx = llvmCompile(name, source);
247+ res->ptx = nvccCompile (name, source);
214248 } else {
215249 CHECK (false ) << " Unknown CUDA compiler: " << FLAGS_cuda_compiler;
216250 }
0 commit comments