@@ -400,8 +400,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
400400 template <typename T> requires nbl::is_any_of_v<T,std::conditional_t <std::is_same_v<BufferType,IGPUBuffer>,uint32_t ,BuildRangeInfo>,BuildRangeInfo>
401401 inline uint32_t valid (const T& buildRangeInfo) const
402402 {
403+ uint32_t retval = trackedBLASes.size ();
403404 if constexpr (std::is_same_v<T,uint32_t >)
404- return valid<BuildRangeInfo>({.instanceCount =buildRangeInfo,.instanceByteOffset =0 });
405+ retval += valid<BuildRangeInfo>({.instanceCount =buildRangeInfo,.instanceByteOffset =0 });
405406 else
406407 {
407408 if (IGPUAccelerationStructure::BuildInfo<BufferType>::invalid (srcAS,dstAS))
@@ -444,8 +445,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
444445 #endif
445446
446447 // destination, scratch and instanceData are required, source is optional
447- return Base::isUpdate ? 4u :3u ;
448+ retval += Base::isUpdate ? 4u :3u ;
448449 }
450+ return retval;
449451 }
450452
451453 inline core::smart_refctd_ptr<const IReferenceCounted>* fillTracking (core::smart_refctd_ptr<const IReferenceCounted>* oit) const
@@ -457,6 +459,9 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
457459
458460 *(oit++) = core::smart_refctd_ptr<const IReferenceCounted>(instanceData.buffer );
459461
462+ for (const auto & blas : trackedBLASes)
463+ *(oit++) = core::smart_refctd_ptr<const IReferenceCounted>(blas);
464+
460465 return oit;
461466 }
462467
@@ -470,6 +475,8 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
470475 // + an array of `PolymorphicInstance` if our `SCreationParams::flags.hasFlags(MOTION_BIT)`, otherwise
471476 // + an array of `StaticInstance`
472477 asset::SBufferBinding<const BufferType> instanceData = {};
478+ // [optional] Provide info about what BLAS references to hold onto after the build. For performance make sure the list is compact (without repeated elements).
479+ std::span<const IGPUBottomLevelAccelerationStructure*> trackedBLASes = {};
473480 };
474481 using DeviceBuildInfo = BuildInfo<IGPUBuffer>;
475482 using HostBuildInfo = BuildInfo<asset::ICPUBuffer>;
@@ -545,11 +552,71 @@ class IGPUTopLevelAccelerationStructure : public asset::ITopLevelAccelerationStr
545552 using HostPolymorphicInstance = PolymorphicInstance<IGPUBottomLevelAccelerationStructure::host_op_ref_t >;
546553 static_assert (sizeof (DevicePolymorphicInstance)==sizeof (HostPolymorphicInstance));
547554
555+ //
556+ using build_ver_t = uint32_t ;
557+ // this gets called when execution is sure to happen 100%, e.g. not during command recording but during submission
558+ inline build_ver_t registerNextBuildVer ()
559+ {
560+ return m_pendingBuildVer++;
561+ }
562+ //
563+ using blas_smart_ptr_t = core::smart_refctd_ptr<const IGPUBottomLevelAccelerationStructure>;
564+ // returns number of tracked BLASes if `tracked==nullptr` otherwise writes `*count` tracked BLASes from `first` into `*tracked`
565+ inline build_ver_t getTrackedBLASes (uint32_t * count, blas_smart_ptr_t * tracked, const uint32_t first=0 ) const
566+ {
567+ if (!count)
568+ return 0 ;
569+ // stop multiple threads messing with us
570+ std::lock_guard lk (m_trackingLock);
571+ const uint32_t toWrite = std::min<uint32_t >(std::max<uint32_t >(m_trackedBLASes.size (),first)-first,tracked ? (*count):0xffFFffFFu );
572+ *count = toWrite;
573+ if (tracked && toWrite)
574+ {
575+ auto it = m_trackedBLASes.begin ();
576+ // cmon its an unordered map, iterator should have operator +=
577+ for (auto i=0 ; i<first; i++)
578+ it++;
579+ for (auto i=0 ; i<toWrite; i++)
580+ *(tracked++) = *(it++);
581+ }
582+ return m_completedBuildVer;
583+ }
584+ // Useful if TLAS got built externally as well, returns if there were no later builds that preempted us setting the result here
585+ template <typename Iterator>
586+ inline bool setTrackedBLASes (const Iterator begin, const Iterator end, const build_ver_t buildVer)
587+ {
588+ // stop multiple threads messing with us
589+ std::lock_guard lk (m_trackingLock);
590+ // stop out of order callbacks
591+ if (buildVer<=m_completedBuildVer)
592+ return false ;
593+ m_completedBuildVer = buildVer;
594+ // release already tracked BLASes
595+ m_trackedBLASes.clear ();
596+ // sanity check, TODO: this should be an atomic_max on the `m_pendingBuildVer`
597+ if (m_completedBuildVer>m_pendingBuildVer)
598+ m_pendingBuildVer = m_completedBuildVer;
599+ // now fill the contents
600+ m_trackedBLASes.insert (begin,end);
601+ return true ;
602+ }
603+ // a little utility to make sure nothing from this build version and before gets tracked
604+ inline bool clearTrackedBLASes (const build_ver_t buildVer)
605+ {
606+ return setTrackedBLASes<const blas_smart_ptr_t *>(nullptr ,nullptr ,buildVer);
607+ }
608+
548609 protected:
549610 inline IGPUTopLevelAccelerationStructure (core::smart_refctd_ptr<const ILogicalDevice>&& dev, SCreationParams&& params)
550- : asset::ITopLevelAccelerationStructure<IGPUAccelerationStructure>(std::move(dev),std::move(params)), m_maxInstanceCount(params.maxInstanceCount) {}
611+ : asset::ITopLevelAccelerationStructure<IGPUAccelerationStructure>(std::move(dev),std::move(params)),
612+ m_maxInstanceCount(params.maxInstanceCount),m_trackedBLASes() {}
551613
552614 const uint32_t m_maxInstanceCount;
615+ // TODO: maybe replace with new readers/writers lock
616+ mutable std::mutex m_trackingLock;
617+ std::atomic<build_ver_t > m_pendingBuildVer = 0 ;
618+ build_ver_t m_completedBuildVer = 0 ;
619+ core::unordered_set<blas_smart_ptr_t > m_trackedBLASes;
553620};
554621
555622}
0 commit comments