Reorder code in cuda_compilation_cache.h and drop forward declaration

nicolasvasilache · Theodoros Theodoridis · commit 5e076a7b5405 · 2018-04-11T11:04:27.000+02:00
diff --git a/tc/core/cuda/cuda_compilation_cache.h b/tc/core/cuda/cuda_compilation_cache.h
@@ -34,102 +34,6 @@
 
 namespace tc {
 
-class OptionsCache;
-
-////////////////////////////////////////////////////////////////////////////////
-// CudaCache
-////////////////////////////////////////////////////////////////////////////////
-struct CudaCachedEntry {
-  CudaCachedEntry(
-      const std::string& id,
-      const std::string& kernelSpecializedName,
-      const std::vector<int>& kernelParameters,
-      const Grid& grid,
-      const Block& block,
-      const CudaMappingOptions& mappingOptions,
-      const std::vector<const DLTensor*>& inputs,
-      const std::vector<const DLTensor*>& outputs,
-      const std::string& cudaSource,
-      const std::string& deviceStr);
-
-  CudaCachedEntry(const CudaCacheEntryProto& buf);
-  CudaCacheEntryProto toProtobuf() const;
-
-  struct Key {
-    std::string id;
-    CudaMappingOptions mappingOptions;
-    std::vector<detail::TensorInfo> inputs;
-    std::vector<detail::TensorInfo> outputs;
-    std::string deviceStr;
-    std::string gitVersion;
-  };
-
-  struct Values {
-    std::string cudaSource;
-    std::string kernelSpecializedName;
-    std::vector<int> kernelParameters;
-    Grid grid;
-    Block block;
-  };
-  Key key;
-  Values values;
-};
-
-struct CudaCacheRetrievalResult {
-  std::string source;
-  std::string specializedName;
-  std::vector<int> parameters;
-  Grid grid;
-  Block block;
-};
-
-/**
- * CudaCache stores the Cuda source of optimized kernels
- * A CudaCache holds multiple CudaCachedEntry's.
- * Each CudaCachedEntry is split to two conceptual parts the key and the values.
- * The values are:
- *                  the specialized (wrt inputs) Cuda source code,
- *                  the kernel's specialized name,
- *                  the kernel parameters,
- *                  the Cuda block and grid dimensions
- * The key is:
- *                  the kernel/op's unique id (string),
- *                  the specialized input dimensions,
- *                  the isl options when the kernel was optimized,
- *                  the target architecture (string),
- *                  tc's version (string),
- */
-class CudaCache : public Cache<CudaCache, CudaCachedEntry> {
- public:
-  using ProtobufType = CudaCacheProto;
-  using CachedEntry = CudaCachedEntry;
-  using RetrievalResult = CudaCacheRetrievalResult;
-  static std::shared_ptr<CudaCache>& getGlobalSharedCache();
-
-  CudaCache() = default;
-  CudaCache(const CudaCacheProto& buf);
-  CudaCacheProto toProtobuf() const;
-
-  /**
-   * If op was previously cached and the inputs' shape, isl options, and the
-   * target device are the same then this is a noop
-   * Else (cudaSource, grid, block) is stored in the cache
-   */
-  void cacheKernel(CudaCachedEntry&& entry);
-
-  /**
-   * Returns the cache entry that matches op (id, isl options, target device)
-   * and inputs' shapes.
-   */
-  std::unique_ptr<CudaCacheRetrievalResult> retrieveKernel(
-      const std::string& id,
-      const CudaMappingOptions& options,
-      const std::vector<const DLTensor*>& inputs,
-      const std::vector<const DLTensor*>& outputs) const;
-
-  void removeEntriesNotInOptionsCache(const OptionsCache& oc);
-};
-
 ////////////////////////////////////////////////////////////////////////////////
 // OptionsCache
 ////////////////////////////////////////////////////////////////////////////////
@@ -231,6 +135,98 @@ class OptionsCache : public Cache<OptionsCache, OptionsCachedEntry> {
   void keepOnlyBestCandidates(size_t numberToKeep);
 };
 
+////////////////////////////////////////////////////////////////////////////////
+// CudaCache
+////////////////////////////////////////////////////////////////////////////////
+struct CudaCachedEntry {
+  CudaCachedEntry(
+      const std::string& id,
+      const std::string& kernelSpecializedName,
+      const std::vector<int>& kernelParameters,
+      const Grid& grid,
+      const Block& block,
+      const CudaMappingOptions& mappingOptions,
+      const std::vector<const DLTensor*>& inputs,
+      const std::vector<const DLTensor*>& outputs,
+      const std::string& cudaSource,
+      const std::string& deviceStr);
+
+  CudaCachedEntry(const CudaCacheEntryProto& buf);
+  CudaCacheEntryProto toProtobuf() const;
+
+  struct Key {
+    std::string id;
+    CudaMappingOptions mappingOptions;
+    std::vector<detail::TensorInfo> inputs;
+    std::vector<detail::TensorInfo> outputs;
+    std::string deviceStr;
+    std::string gitVersion;
+  };
+
+  struct Values {
+    std::string cudaSource;
+    std::string kernelSpecializedName;
+    std::vector<int> kernelParameters;
+    Grid grid;
+    Block block;
+  };
+  Key key;
+  Values values;
+};
+
+struct CudaCacheRetrievalResult {
+  std::string source;
+  std::string specializedName;
+  std::vector<int> parameters;
+  Grid grid;
+  Block block;
+};
+
+/**
+ * CudaCache stores the Cuda source of optimized kernels
+ * A CudaCache holds multiple CudaCachedEntry's.
+ * Each CudaCachedEntry is split to two conceptual parts the key and the values.
+ * The values are:
+ *                  the specialized (wrt inputs) Cuda source code,
+ *                  the kernel's specialized name,
+ *                  the kernel parameters,
+ *                  the Cuda block and grid dimensions
+ * The key is:
+ *                  the kernel/op's unique id (string),
+ *                  the specialized input dimensions,
+ *                  the isl options when the kernel was optimized,
+ *                  the target architecture (string),
+ *                  tc's version (string),
+ */
+class CudaCache : public Cache<CudaCache, CudaCachedEntry> {
+ public:
+  typedef CudaCacheProto ProtobufType;
+  static std::shared_ptr<CudaCache>& getGlobalSharedCache();
+
+  CudaCache() = default;
+  CudaCache(const CudaCacheProto& buf);
+  CudaCacheProto toProtobuf() const;
+
+  /**
+   * If op was previously cached and the inputs' shape, isl options, and the
+   * target device are the same then this is a noop
+   * Else (cudaSource, grid, block) is stored in the cache
+   */
+  void cacheKernel(CudaCachedEntry&& entry);
+
+  /**
+   * Returns the cache entry that matches op (id, isl options, target device)
+   * and inputs' shapes.
+   */
+  std::unique_ptr<CudaCacheRetrievalResult> retrieveKernel(
+      const std::string& id,
+      const CudaMappingOptions& options,
+      const std::vector<const DLTensor*>& inputs,
+      const std::vector<const DLTensor*>& outputs) const;
+
+  void removeEntriesNotInOptionsCache(const OptionsCache& oc);
+};
+
 ////////////////////////////////////////////////////////////////////////////////
 // ManualCudaCache
 ////////////////////////////////////////////////////////////////////////////////