|
34 | 34 |
|
35 | 35 | namespace tc { |
36 | 36 |
|
37 | | -class OptionsCache; |
38 | | - |
39 | | -//////////////////////////////////////////////////////////////////////////////// |
40 | | -// CudaCache |
41 | | -//////////////////////////////////////////////////////////////////////////////// |
42 | | -struct CudaCachedEntry { |
43 | | - CudaCachedEntry( |
44 | | - const std::string& id, |
45 | | - const std::string& kernelSpecializedName, |
46 | | - const std::vector<int>& kernelParameters, |
47 | | - const Grid& grid, |
48 | | - const Block& block, |
49 | | - const CudaMappingOptions& mappingOptions, |
50 | | - const std::vector<const DLTensor*>& inputs, |
51 | | - const std::vector<const DLTensor*>& outputs, |
52 | | - const std::string& cudaSource, |
53 | | - const std::string& deviceStr); |
54 | | - |
55 | | - CudaCachedEntry(const CudaCacheEntryProto& buf); |
56 | | - CudaCacheEntryProto toProtobuf() const; |
57 | | - |
58 | | - struct Key { |
59 | | - std::string id; |
60 | | - CudaMappingOptions mappingOptions; |
61 | | - std::vector<detail::TensorInfo> inputs; |
62 | | - std::vector<detail::TensorInfo> outputs; |
63 | | - std::string deviceStr; |
64 | | - std::string gitVersion; |
65 | | - }; |
66 | | - |
67 | | - struct Values { |
68 | | - std::string cudaSource; |
69 | | - std::string kernelSpecializedName; |
70 | | - std::vector<int> kernelParameters; |
71 | | - Grid grid; |
72 | | - Block block; |
73 | | - }; |
74 | | - Key key; |
75 | | - Values values; |
76 | | -}; |
77 | | - |
78 | | -struct CudaCacheRetrievalResult { |
79 | | - std::string source; |
80 | | - std::string specializedName; |
81 | | - std::vector<int> parameters; |
82 | | - Grid grid; |
83 | | - Block block; |
84 | | -}; |
85 | | - |
86 | | -/** |
87 | | - * CudaCache stores the Cuda source of optimized kernels |
88 | | - * A CudaCache holds multiple CudaCachedEntry's. |
89 | | - * Each CudaCachedEntry is split to two conceptual parts the key and the values. |
90 | | - * The values are: |
91 | | - * the specialized (wrt inputs) Cuda source code, |
92 | | - * the kernel's specialized name, |
93 | | - * the kernel parameters, |
94 | | - * the Cuda block and grid dimensions |
95 | | - * The key is: |
96 | | - * the kernel/op's unique id (string), |
97 | | - * the specialized input dimensions, |
98 | | - * the isl options when the kernel was optimized, |
99 | | - * the target architecture (string), |
100 | | - * tc's version (string), |
101 | | - */ |
102 | | -class CudaCache : public Cache<CudaCache, CudaCachedEntry> { |
103 | | - public: |
104 | | - using ProtobufType = CudaCacheProto; |
105 | | - using CachedEntry = CudaCachedEntry; |
106 | | - using RetrievalResult = CudaCacheRetrievalResult; |
107 | | - static std::shared_ptr<CudaCache>& getGlobalSharedCache(); |
108 | | - |
109 | | - CudaCache() = default; |
110 | | - CudaCache(const CudaCacheProto& buf); |
111 | | - CudaCacheProto toProtobuf() const; |
112 | | - |
113 | | - /** |
114 | | - * If op was previously cached and the inputs' shape, isl options, and the |
115 | | - * target device are the same then this is a noop |
116 | | - * Else (cudaSource, grid, block) is stored in the cache |
117 | | - */ |
118 | | - void cacheKernel(CudaCachedEntry&& entry); |
119 | | - |
120 | | - /** |
121 | | - * Returns the cache entry that matches op (id, isl options, target device) |
122 | | - * and inputs' shapes. |
123 | | - */ |
124 | | - std::unique_ptr<CudaCacheRetrievalResult> retrieveKernel( |
125 | | - const std::string& id, |
126 | | - const CudaMappingOptions& options, |
127 | | - const std::vector<const DLTensor*>& inputs, |
128 | | - const std::vector<const DLTensor*>& outputs) const; |
129 | | - |
130 | | - void removeEntriesNotInOptionsCache(const OptionsCache& oc); |
131 | | -}; |
132 | | - |
133 | 37 | //////////////////////////////////////////////////////////////////////////////// |
134 | 38 | // OptionsCache |
135 | 39 | //////////////////////////////////////////////////////////////////////////////// |
@@ -231,6 +135,98 @@ class OptionsCache : public Cache<OptionsCache, OptionsCachedEntry> { |
231 | 135 | void keepOnlyBestCandidates(size_t numberToKeep); |
232 | 136 | }; |
233 | 137 |
|
| 138 | +//////////////////////////////////////////////////////////////////////////////// |
| 139 | +// CudaCache |
| 140 | +//////////////////////////////////////////////////////////////////////////////// |
| 141 | +struct CudaCachedEntry { |
| 142 | + CudaCachedEntry( |
| 143 | + const std::string& id, |
| 144 | + const std::string& kernelSpecializedName, |
| 145 | + const std::vector<int>& kernelParameters, |
| 146 | + const Grid& grid, |
| 147 | + const Block& block, |
| 148 | + const CudaMappingOptions& mappingOptions, |
| 149 | + const std::vector<const DLTensor*>& inputs, |
| 150 | + const std::vector<const DLTensor*>& outputs, |
| 151 | + const std::string& cudaSource, |
| 152 | + const std::string& deviceStr); |
| 153 | + |
| 154 | + CudaCachedEntry(const CudaCacheEntryProto& buf); |
| 155 | + CudaCacheEntryProto toProtobuf() const; |
| 156 | + |
| 157 | + struct Key { |
| 158 | + std::string id; |
| 159 | + CudaMappingOptions mappingOptions; |
| 160 | + std::vector<detail::TensorInfo> inputs; |
| 161 | + std::vector<detail::TensorInfo> outputs; |
| 162 | + std::string deviceStr; |
| 163 | + std::string gitVersion; |
| 164 | + }; |
| 165 | + |
| 166 | + struct Values { |
| 167 | + std::string cudaSource; |
| 168 | + std::string kernelSpecializedName; |
| 169 | + std::vector<int> kernelParameters; |
| 170 | + Grid grid; |
| 171 | + Block block; |
| 172 | + }; |
| 173 | + Key key; |
| 174 | + Values values; |
| 175 | +}; |
| 176 | + |
| 177 | +struct CudaCacheRetrievalResult { |
| 178 | + std::string source; |
| 179 | + std::string specializedName; |
| 180 | + std::vector<int> parameters; |
| 181 | + Grid grid; |
| 182 | + Block block; |
| 183 | +}; |
| 184 | + |
| 185 | +/** |
| 186 | + * CudaCache stores the Cuda source of optimized kernels |
| 187 | + * A CudaCache holds multiple CudaCachedEntry's. |
| 188 | + * Each CudaCachedEntry is split to two conceptual parts the key and the values. |
| 189 | + * The values are: |
| 190 | + * the specialized (wrt inputs) Cuda source code, |
| 191 | + * the kernel's specialized name, |
| 192 | + * the kernel parameters, |
| 193 | + * the Cuda block and grid dimensions |
| 194 | + * The key is: |
| 195 | + * the kernel/op's unique id (string), |
| 196 | + * the specialized input dimensions, |
| 197 | + * the isl options when the kernel was optimized, |
| 198 | + * the target architecture (string), |
| 199 | + * tc's version (string), |
| 200 | + */ |
| 201 | +class CudaCache : public Cache<CudaCache, CudaCachedEntry> { |
| 202 | + public: |
| 203 | + typedef CudaCacheProto ProtobufType; |
| 204 | + static std::shared_ptr<CudaCache>& getGlobalSharedCache(); |
| 205 | + |
| 206 | + CudaCache() = default; |
| 207 | + CudaCache(const CudaCacheProto& buf); |
| 208 | + CudaCacheProto toProtobuf() const; |
| 209 | + |
| 210 | + /** |
| 211 | + * If op was previously cached and the inputs' shape, isl options, and the |
| 212 | + * target device are the same then this is a noop |
| 213 | + * Else (cudaSource, grid, block) is stored in the cache |
| 214 | + */ |
| 215 | + void cacheKernel(CudaCachedEntry&& entry); |
| 216 | + |
| 217 | + /** |
| 218 | + * Returns the cache entry that matches op (id, isl options, target device) |
| 219 | + * and inputs' shapes. |
| 220 | + */ |
| 221 | + std::unique_ptr<CudaCacheRetrievalResult> retrieveKernel( |
| 222 | + const std::string& id, |
| 223 | + const CudaMappingOptions& options, |
| 224 | + const std::vector<const DLTensor*>& inputs, |
| 225 | + const std::vector<const DLTensor*>& outputs) const; |
| 226 | + |
| 227 | + void removeEntriesNotInOptionsCache(const OptionsCache& oc); |
| 228 | +}; |
| 229 | + |
234 | 230 | //////////////////////////////////////////////////////////////////////////////// |
235 | 231 | // ManualCudaCache |
236 | 232 | //////////////////////////////////////////////////////////////////////////////// |
|
0 commit comments