@@ -141,152 +141,6 @@ CachedEntryType* searchKernel(
141141}
142142} // namespace
143143
144- // //////////////////////////////////////////////////////////////////////////////
145- // CudaCache
146- // //////////////////////////////////////////////////////////////////////////////
147- std::shared_ptr<CudaCache>& CudaCache::getGlobalSharedCache () {
148- static std::shared_ptr<CudaCache> cudaCache_;
149- return cudaCache_;
150- }
151-
152- CudaCachedEntry::CudaCachedEntry (
153- const std::string& id,
154- const std::string& kernelSpecializedName,
155- const std::vector<int >& kernelParameters,
156- const Grid& grid,
157- const Block& block,
158- const CudaMappingOptions& mappingOptions,
159- const std::vector<const DLTensor*>& inputs,
160- const std::vector<const DLTensor*>& outputs,
161- const std::string& cudaSource,
162- const std::string& deviceStr)
163- : key{id,
164- mappingOptions,
165- DLTensorToTensorInfoVector (inputs),
166- DLTensorToTensorInfoVector (outputs),
167- deviceStr,
168- git_version},
169- values{cudaSource, kernelSpecializedName, kernelParameters, grid, block} {
170- }
171-
172- CudaCachedEntry::CudaCachedEntry (const CudaCacheEntryProto& buf)
173- : key{buf.id (),
174- CudaMappingOptions{buf.kernel_options ()},
175- ProtoToTensorInfoVector (buf.inputs ()),
176- ProtoToTensorInfoVector (buf.outputs ()),
177- buf.device_str (),
178- buf.git_version ()},
179- values{buf.cuda_source (),
180- buf.specialized_name (),
181- std::vector<int >{buf.parameters ().begin (), buf.parameters ().end ()},
182- Grid (buf.grid_dims ()),
183- Block (buf.block_dims ())} {}
184-
185- CudaCache::CudaCache (const CudaCacheProto& buf) {
186- entries_.reserve (buf.entries_size ());
187- for (const auto & entry_buf : buf.entries ())
188- entries_.emplace_back (entry_buf);
189- }
190-
191- void CudaCache::cacheKernel (CudaCachedEntry&& entry) {
192- std::lock_guard<std::mutex> lock (mtx_);
193- ++numberCacheAttemps;
194- auto retrievedEntry = searchKernel (
195- entries_,
196- entry.key .id ,
197- entry.key .mappingOptions ,
198- entry.key .inputs ,
199- entry.key .outputs );
200- if (retrievedEntry) {
201- if (retrievedEntry->values .cudaSource != entry.values .cudaSource or
202- retrievedEntry->values .grid != entry.values .grid or
203- retrievedEntry->values .block != entry.values .block ) {
204- throw CacheEntrySameKeyDifferentValue (
205- " CudaCache::CacheKernel: a kernel matching the id, options and "
206- " inputs was previously cached with different cuda source or block "
207- " or grid dimensions." );
208- }
209- return ;
210- }
211- entries_.emplace_back (std::move (entry));
212- }
213-
214- std::unique_ptr<CudaCacheRetrievalResult> CudaCache::retrieveKernel (
215- const std::string& id,
216- const CudaMappingOptions& options,
217- const std::vector<const DLTensor*>& inputs,
218- const std::vector<const DLTensor*>& outputs) const {
219- std::lock_guard<std::mutex> lock (mtx_);
220- ++numberAttemptedRetrievals;
221- auto entry = searchKernel (entries_, id, options, inputs, outputs);
222- if (not entry) {
223- return nullptr ;
224- }
225- ++numberSuccessfulRetrievals;
226- return std::unique_ptr<CudaCacheRetrievalResult>(
227- new CudaCacheRetrievalResult{entry->values .cudaSource ,
228- entry->values .kernelSpecializedName ,
229- entry->values .kernelParameters ,
230- entry->values .grid ,
231- entry->values .block });
232- }
233-
234- void CudaCache::removeEntriesNotInOptionsCache (const OptionsCache& oc) {
235- std::vector<CudaCachedEntry> newEntries;
236- for (const auto & entry : oc) {
237- for (const auto & options : entry.values ) {
238- auto cudaEntry = searchKernel (
239- entries_,
240- entry.key .id ,
241- options.mappingOptions ,
242- entry.key .inputs ,
243- entry.key .outputs );
244- if (cudaEntry) {
245- newEntries.push_back (std::move (*cudaEntry));
246- }
247- }
248- }
249- entries_ = std::move (newEntries);
250- }
251-
252- CudaCacheProto CudaCache::toProtobuf () const {
253- CudaCacheProto buf;
254- auto * entriesBuf = buf.mutable_entries ();
255- entriesBuf->Reserve (entries_.size ());
256- std::transform (
257- entries_.begin (),
258- entries_.end (),
259- google::protobuf::RepeatedPtrFieldBackInserter (entriesBuf),
260- [](const CudaCachedEntry& entry) { return entry.toProtobuf (); });
261- return buf;
262- }
263-
264- CudaCacheEntryProto CudaCachedEntry::toProtobuf () const {
265- CudaCacheEntryProto buf;
266- buf.set_id (key.id );
267- *buf.mutable_kernel_options () = key.mappingOptions .proto ();
268- std::transform (
269- key.inputs .begin (),
270- key.inputs .end (),
271- google::protobuf::RepeatedPtrFieldBackInserter (buf.mutable_inputs ()),
272- [](const detail::TensorInfo& input) { return input.toProtobuf (); });
273- std::transform (
274- key.outputs .begin (),
275- key.outputs .end (),
276- google::protobuf::RepeatedPtrFieldBackInserter (buf.mutable_outputs ()),
277- [](const detail::TensorInfo& output) { return output.toProtobuf (); });
278- buf.set_device_str (key.deviceStr );
279- buf.set_git_version (key.gitVersion );
280-
281- buf.set_cuda_source (values.cudaSource );
282- *buf.mutable_grid_dims () = values.grid .view .proto ;
283- *buf.mutable_block_dims () = values.block .view .proto ;
284- buf.set_specialized_name (values.kernelSpecializedName );
285- WriteProtobufArray (values.kernelParameters , buf.mutable_parameters ());
286-
287- return buf;
288- }
289-
290144// //////////////////////////////////////////////////////////////////////////////
291145// OptionsCache
292146// //////////////////////////////////////////////////////////////////////////////
@@ -566,66 +420,4 @@ void OptionsCache::keepOnlyBestCandidates(size_t numberToKeep) {
566420 }
567421 }
568422}
569-
570- // //////////////////////////////////////////////////////////////////////////////
571- // ManualCudaCache
572- // //////////////////////////////////////////////////////////////////////////////
573- std::shared_ptr<ManualCudaCache>& ManualCudaCache::getGlobalSharedCache () {
574- static std::shared_ptr<ManualCudaCache> manualCudaCache_;
575- return manualCudaCache_;
576- }
577-
578- ManualCudaCachedEntry::ManualCudaCachedEntry (
579- const std::string& id,
580- const std::string& kernelSpecializedName,
581- const std::vector<int >& kernelParameters,
582- const Grid& grid,
583- const Block& block,
584- const std::vector<const DLTensor*>& inputs,
585- const std::vector<const DLTensor*>& outputs,
586- const std::string& cudaSource,
587- const std::string& deviceStr)
588- : key{id,
589- DLTensorToTensorInfoVector (inputs),
590- DLTensorToTensorInfoVector (outputs),
591- deviceStr,
592- git_version},
593- values{cudaSource, kernelSpecializedName, kernelParameters, grid, block} {
594- }
595-
596- void ManualCudaCache::cacheKernel (ManualCudaCachedEntry&& entry) {
597- std::lock_guard<std::mutex> lock (mtx_);
598- ++numberCacheAttemps;
599- auto retrievedEntry =
600- searchKernel (entries_, entry.key .id , entry.key .inputs , entry.key .outputs );
601- if (retrievedEntry) {
602- retrievedEntry->values .grid = entry.values .grid ;
603- retrievedEntry->values .block = entry.values .block ;
604- retrievedEntry->values .cudaSource = entry.values .cudaSource ;
605- retrievedEntry->values .kernelSpecializedName =
606- entry.values .kernelSpecializedName ;
607- retrievedEntry->values .kernelParameters = entry.values .kernelParameters ;
608- return ;
609- }
610- entries_.emplace_back (std::move (entry));
611- }
612-
613- std::unique_ptr<ManualCudaCacheRetrievalResult> ManualCudaCache::retrieveKernel (
614- const std::string& id,
615- const std::vector<const DLTensor*>& inputs,
616- const std::vector<const DLTensor*>& outputs) const {
617- std::lock_guard<std::mutex> lock (mtx_);
618- ++numberAttemptedRetrievals;
619- auto entry = searchKernel (entries_, id, inputs, outputs);
620- if (not entry) {
621- return nullptr ;
622- }
623- ++numberSuccessfulRetrievals;
624- return std::unique_ptr<ManualCudaCacheRetrievalResult>(
625- new ManualCudaCacheRetrievalResult{entry->values .cudaSource ,
626- entry->values .kernelSpecializedName ,
627- entry->values .kernelParameters ,
628- entry->values .grid ,
629- entry->values .block });
630- }
631423} // namespace tc
0 commit comments