facebookresearch
diff --git a/‎docs/source/framework/pytorch_integration/writing_layers.rst‎
Lines changed: 0 additions & 42 deletions b/‎docs/source/framework/pytorch_integration/writing_layers.rst‎
Lines changed: 0 additions & 42 deletions
diff --git a/‎tc/autotuner/genetic_autotuner.cc‎
Lines changed: 0 additions & 5 deletions b/‎tc/autotuner/genetic_autotuner.cc‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎tc/autotuner/genetic_autotuner_aten.cc‎
Lines changed: 0 additions & 1 deletion b/‎tc/autotuner/genetic_autotuner_aten.cc‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎tc/autotuner/genetic_tuning_harness.cc‎
Lines changed: 0 additions & 1 deletion b/‎tc/autotuner/genetic_tuning_harness.cc‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎tc/benchmarks/benchmark_fixture.h‎
Lines changed: 0 additions & 2 deletions b/‎tc/benchmarks/benchmark_fixture.h‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎tc/core/cuda/cuda_compilation_cache.cc‎
Lines changed: 0 additions & 208 deletions b/‎tc/core/cuda/cuda_compilation_cache.cc‎
Lines changed: 0 additions & 208 deletions
@@ -219,48 +219,6 @@ adopt whatever feels more convenient.
     inp = torch.ones(1, 1, 4, 4).cuda()
     out = avgpool(inp)
 
-
-Manually injecting external CUDA code
--------------------------------------
-
-If you have an external efficient CUDA code that you want to use rather than
-the CUDA code that TC generates, you can inject your code easily. For this,
-you need to create a string which has the CUDA code you want to inject and you
-need to pass the name of the kernel and the CUDA code string to the :code:`tc.define`
-call. For example:
-
-.. code-block:: python
-
-    import tensor_comprehensions as tc
-    import torch
-    lang = """
-    def add(float(N) A, float(N) B) -> (output) {
-        output(n) = A(n) + B(n)
-    }
-    """
-
-    cuda_code = """
-    extern "C"{
-    __global__ void my_add(float* __restrict__ output, const float* __restrict__ A, const float* __restrict B)
-    {
-        int t = threadIdx.x;
-        output[t] = A[t] + B[t];
-    }
-    }
-    """
-
-    add = tc.define(lang, name="add", inject_kernel="my_add", cuda_code=cuda_code)
-    a, b = torch.randn(100).cuda(), torch.randn(100).cuda()
-    out = add(a, b, grid=[1, 1, 1], block=[100, 1, 1])
-
-.. note::
-
-    In such cases, please note that TC doesn't modify the injected CUDA kernel. It will
-    simply run the kernel injected as is and TC will also not guarantee the performance
-    of the kernel. User needs to specify the :code:`grid` and :code:`block` values
-    when running the layer and TC will simply use those settings.
-
-
 Built-in Functions
 ------------------
 
 
@@ -43,10 +43,8 @@ namespace {
 
 void enableOrLoadCache(const std::string& filename) {
   tc::OptionsCache::enableCache();
-  tc::CudaCache::enableCache();
   if (!filename.empty()) {
     tc::OptionsCache::loadCacheFromProtobuf(tc::makeOptionsFilename(filename));
-    tc::CudaCache::loadCacheFromProtobuf(tc::makeCudaFilename(filename));
   }
 }
 } // namespace
@@ -62,9 +60,6 @@ void GeneticAutotuner::storeCaches(const std::string& filename) {
     tc::OptionsCache::dumpCacheToProtobuf(tc::makeOptionsFilename(filename));
 
     tc::OptionsCache::getCache()->keepOnlyBestCandidates(1);
-    tc::removeFromCudaCacheEntriesNotInOptionsCache(
-        *tc::CudaCache::getCache(), *tc::OptionsCache::getCache());
-    tc::CudaCache::dumpCacheToProtobuf(tc::makeCudaFilename(filename));
   }
 }
 
 
@@ -21,7 +21,6 @@
 #include <thread>
 
 #include "tc/core/cuda/cuda.h"
-#include "tc/core/cuda/cuda_compilation_cache.h"
 #include "tc/core/cuda/cuda_tc_executor.h"
 #include "tc/core/flags.h"
 #include "tc/core/scope_guard.h"
 
@@ -27,7 +27,6 @@
 #include "tc/autotuner/utils/printer.h"
 #include "tc/autotuner/utils/utils.h"
 #include "tc/core/cuda/cuda.h"
-#include "tc/core/cuda/cuda_compilation_cache.h"
 #include "tc/core/cuda/cuda_mapping_options_cpp_printer.h"
 #include "tc/core/cuda/cuda_tc_executor.h"
 #include "tc/core/execution_engine.h"
 
@@ -260,8 +260,6 @@ struct Benchmark : public ::testing::Test {
 
     tc::OptionsCache::enableCache();
     tc::OptionsCache::loadCacheFromProtobuf(cacheFilename + ".options");
-    tc::CudaCache::enableCache();
-    tc::CudaCache::loadCacheFromProtobuf(tc::makeCudaFilename(cacheFilename));
     tc::FLAGS_tuner_gen_restore_number = 1;
 
     tc::ATenCompilationUnit<tc::CudaTcExecutor> atCompl;
 
@@ -141,152 +141,6 @@ CachedEntryType* searchKernel(
 }
 } // namespace
 
-////////////////////////////////////////////////////////////////////////////////
-// CudaCache
-////////////////////////////////////////////////////////////////////////////////
-std::shared_ptr<CudaCache>& CudaCache::getGlobalSharedCache() {
-  static std::shared_ptr<CudaCache> cudaCache_;
-  return cudaCache_;
-}
-
-CudaCachedEntry::CudaCachedEntry(
-    const std::string& id,
-    const std::string& kernelSpecializedName,
-    const std::vector<int>& kernelParameters,
-    const Grid& grid,
-    const Block& block,
-    const CudaMappingOptions& mappingOptions,
-    const std::vector<const DLTensor*>& inputs,
-    const std::vector<const DLTensor*>& outputs,
-    const std::string& cudaSource,
-    const std::string& deviceStr)
-    : key{id,
-          mappingOptions,
-          DLTensorToTensorInfoVector(inputs),
-          DLTensorToTensorInfoVector(outputs),
-          deviceStr,
-          git_version},
-      values{cudaSource, kernelSpecializedName, kernelParameters, grid, block} {
-}
-
-CudaCachedEntry::CudaCachedEntry(const CudaCacheEntryProto& buf)
-    : key{buf.id(),
-          CudaMappingOptions{buf.kernel_options()},
-          ProtoToTensorInfoVector(buf.inputs()),
-          ProtoToTensorInfoVector(buf.outputs()),
-          buf.device_str(),
-          buf.git_version()},
-      values{buf.cuda_source(),
-             buf.specialized_name(),
-             std::vector<int>{buf.parameters().begin(), buf.parameters().end()},
-             Grid(buf.grid_dims()),
-             Block(buf.block_dims())} {}
-
-CudaCache::CudaCache(const CudaCacheProto& buf) {
-  entries_.reserve(buf.entries_size());
-  for (const auto& entry_buf : buf.entries())
-    entries_.emplace_back(entry_buf);
-}
-
-void CudaCache::cacheKernel(CudaCachedEntry&& entry) {
-  std::lock_guard<std::mutex> lock(mtx_);
-  ++numberCacheAttemps;
-  auto retrievedEntry = searchKernel(
-      entries_,
-      entry.key.id,
-      entry.key.mappingOptions,
-      entry.key.inputs,
-      entry.key.outputs);
-  if (retrievedEntry) {
-    if (retrievedEntry->values.cudaSource != entry.values.cudaSource or
-        retrievedEntry->values.grid != entry.values.grid or
-        retrievedEntry->values.block != entry.values.block) {
-      throw CacheEntrySameKeyDifferentValue(
-          "CudaCache::CacheKernel: a kernel matching the id, options and "
-          "inputs was previously cached with different cuda source or block "
-          "or grid dimensions.");
-    }
-    return;
-  }
-  entries_.emplace_back(std::move(entry));
-}
-
-std::unique_ptr<CudaCacheRetrievalResult> CudaCache::retrieveKernel(
-    const std::string& id,
-    const CudaMappingOptions& options,
-    const std::vector<const DLTensor*>& inputs,
-    const std::vector<const DLTensor*>& outputs) const {
-  std::lock_guard<std::mutex> lock(mtx_);
-  ++numberAttemptedRetrievals;
-  auto entry = searchKernel(entries_, id, options, inputs, outputs);
-  if (not entry) {
-    return nullptr;
-  }
-  ++numberSuccessfulRetrievals;
-  return std::unique_ptr<CudaCacheRetrievalResult>(
-      new CudaCacheRetrievalResult{entry->values.cudaSource,
-                                   entry->values.kernelSpecializedName,
-                                   entry->values.kernelParameters,
-                                   entry->values.grid,
-                                   entry->values.block});
-}
-
-void CudaCache::removeEntriesNotInOptionsCache(const OptionsCache& oc) {
-  std::vector<CudaCachedEntry> newEntries;
-  for (const auto& entry : oc) {
-    for (const auto& options : entry.values) {
-      auto cudaEntry = searchKernel(
-          entries_,
-          entry.key.id,
-          options.mappingOptions,
-          entry.key.inputs,
-          entry.key.outputs);
-      if (cudaEntry) {
-        newEntries.push_back(std::move(*cudaEntry));
-      }
-    }
-  }
-  entries_ = std::move(newEntries);
-}
-
-CudaCacheProto CudaCache::toProtobuf() const {
-  CudaCacheProto buf;
-  auto* entriesBuf = buf.mutable_entries();
-  entriesBuf->Reserve(entries_.size());
-  std::transform(
-      entries_.begin(),
-      entries_.end(),
-      google::protobuf::RepeatedPtrFieldBackInserter(entriesBuf),
-      [](const CudaCachedEntry& entry) { return entry.toProtobuf(); });
-  return buf;
-}
-
-CudaCacheEntryProto CudaCachedEntry::toProtobuf() const {
-  CudaCacheEntryProto buf;
-  buf.set_id(key.id);
-  *buf.mutable_kernel_options() = key.mappingOptions.proto();
-  std::transform(
-      key.inputs.begin(),
-      key.inputs.end(),
-      google::protobuf::RepeatedPtrFieldBackInserter(buf.mutable_inputs()),
-      [](const detail::TensorInfo& input) { return input.toProtobuf(); });
-  std::transform(
-      key.outputs.begin(),
-      key.outputs.end(),
-      google::protobuf::RepeatedPtrFieldBackInserter(buf.mutable_outputs()),
-      [](const detail::TensorInfo& output) { return output.toProtobuf(); });
-  buf.set_device_str(key.deviceStr);
-  buf.set_git_version(key.gitVersion);
-
-  buf.set_cuda_source(values.cudaSource);
-  *buf.mutable_grid_dims() = values.grid.view.proto;
-  *buf.mutable_block_dims() = values.block.view.proto;
-  buf.set_specialized_name(values.kernelSpecializedName);
-  WriteProtobufArray(values.kernelParameters, buf.mutable_parameters());
-
-  return buf;
-}
-
 ////////////////////////////////////////////////////////////////////////////////
 // OptionsCache
 ////////////////////////////////////////////////////////////////////////////////
@@ -566,66 +420,4 @@ void OptionsCache::keepOnlyBestCandidates(size_t numberToKeep) {
     }
   }
 }
-
-////////////////////////////////////////////////////////////////////////////////
-// ManualCudaCache
-////////////////////////////////////////////////////////////////////////////////
-std::shared_ptr<ManualCudaCache>& ManualCudaCache::getGlobalSharedCache() {
-  static std::shared_ptr<ManualCudaCache> manualCudaCache_;
-  return manualCudaCache_;
-}
-
-ManualCudaCachedEntry::ManualCudaCachedEntry(
-    const std::string& id,
-    const std::string& kernelSpecializedName,
-    const std::vector<int>& kernelParameters,
-    const Grid& grid,
-    const Block& block,
-    const std::vector<const DLTensor*>& inputs,
-    const std::vector<const DLTensor*>& outputs,
-    const std::string& cudaSource,
-    const std::string& deviceStr)
-    : key{id,
-          DLTensorToTensorInfoVector(inputs),
-          DLTensorToTensorInfoVector(outputs),
-          deviceStr,
-          git_version},
-      values{cudaSource, kernelSpecializedName, kernelParameters, grid, block} {
-}
-
-void ManualCudaCache::cacheKernel(ManualCudaCachedEntry&& entry) {
-  std::lock_guard<std::mutex> lock(mtx_);
-  ++numberCacheAttemps;
-  auto retrievedEntry =
-      searchKernel(entries_, entry.key.id, entry.key.inputs, entry.key.outputs);
-  if (retrievedEntry) {
-    retrievedEntry->values.grid = entry.values.grid;
-    retrievedEntry->values.block = entry.values.block;
-    retrievedEntry->values.cudaSource = entry.values.cudaSource;
-    retrievedEntry->values.kernelSpecializedName =
-        entry.values.kernelSpecializedName;
-    retrievedEntry->values.kernelParameters = entry.values.kernelParameters;
-    return;
-  }
-  entries_.emplace_back(std::move(entry));
-}
-
-std::unique_ptr<ManualCudaCacheRetrievalResult> ManualCudaCache::retrieveKernel(
-    const std::string& id,
-    const std::vector<const DLTensor*>& inputs,
-    const std::vector<const DLTensor*>& outputs) const {
-  std::lock_guard<std::mutex> lock(mtx_);
-  ++numberAttemptedRetrievals;
-  auto entry = searchKernel(entries_, id, inputs, outputs);
-  if (not entry) {
-    return nullptr;
-  }
-  ++numberSuccessfulRetrievals;
-  return std::unique_ptr<ManualCudaCacheRetrievalResult>(
-      new ManualCudaCacheRetrievalResult{entry->values.cudaSource,
-                                         entry->values.kernelSpecializedName,
-                                         entry->values.kernelParameters,
-                                         entry->values.grid,
-                                         entry->values.block});
-}
 } // namespace tc
Original file line number	Diff line number	Diff line change
`@@ -43,10 +43,8 @@ namespace {`
`43`	`43`
`44`	`44`	`void enableOrLoadCache(const std::string& filename) {`
`45`	`45`	`tc::OptionsCache::enableCache();`
`46`		`- tc::CudaCache::enableCache();`
`47`	`46`	`if (!filename.empty()) {`
`48`	`47`	`tc::OptionsCache::loadCacheFromProtobuf(tc::makeOptionsFilename(filename));`
`49`		`- tc::CudaCache::loadCacheFromProtobuf(tc::makeCudaFilename(filename));`
`50`	`48`	`}`
`51`	`49`	`}`
`52`	`50`	`} // namespace`
`@@ -62,9 +60,6 @@ void GeneticAutotuner::storeCaches(const std::string& filename) {`
`62`	`60`	`tc::OptionsCache::dumpCacheToProtobuf(tc::makeOptionsFilename(filename));`
`63`	`61`
`64`	`62`	`tc::OptionsCache::getCache()->keepOnlyBestCandidates(1);`
`65`		`- tc::removeFromCudaCacheEntriesNotInOptionsCache(`
`66`		`- tc::CudaCache::getCache(), tc::OptionsCache::getCache());`
`67`		`- tc::CudaCache::dumpCacheToProtobuf(tc::makeCudaFilename(filename));`
`68`	`63`	`}`
`69`	`64`	`}`
`70`	`65`