test the bda::__ptr and __ref in some examples

devsh · devsh · commit ec6bdab4005a · 2025-03-07T17:36:05.000+01:00
turns out in many we can't use cause of unspecialized templates and bitfields.
diff --git a/11_FFT/app_resources/shader.comp.hlsl b/11_FFT/app_resources/shader.comp.hlsl
@@ -43,6 +43,7 @@ struct Accessor
         return accessor;
     }
 
+	// TODO: can't use our own BDA yet, because it doesn't support the types `workgroup::FFT` will invoke these templates with
 	template <typename AccessType>
 	void get(const uint32_t index, NBL_REF_ARG(AccessType) value)
 	{
diff --git a/67_RayQueryGeometry/app_resources/common.hlsl b/67_RayQueryGeometry/app_resources/common.hlsl
@@ -5,6 +5,7 @@
 
 NBL_CONSTEXPR uint32_t WorkgroupSize = 16;
 
+// we need bitfield support in NBL_HLSL_DECLARE_STRUCT it seems
 struct SGeomInfo
 {
     uint64_t vertexBufferAddress;
diff --git a/67_RayQueryGeometry/app_resources/render.comp.hlsl b/67_RayQueryGeometry/app_resources/render.comp.hlsl
@@ -4,6 +4,7 @@
 
 #include "nbl/builtin/hlsl/glsl_compat/core.hlsl"
 #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl"
+#include "nbl/builtin/hlsl/bda/__ptr.hlsl"
 
 using namespace nbl::hlsl;
 
@@ -24,36 +25,26 @@ float3 unpackNormals3x10(uint32_t v)
 
 float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bary)
 {
-    uint idxOffset = primID * 3;
-
     const uint indexType = geom.indexType;
     const uint vertexStride = geom.vertexStride;
 
     const uint64_t vertexBufferAddress = geom.vertexBufferAddress;
     const uint64_t indexBufferAddress = geom.indexBufferAddress;
 
-    uint i0, i1, i2;
+    uint32_t3 indices;
     switch (indexType)
     {
         case 0: // EIT_16BIT
-        {
-            i0 = uint32_t(vk::RawBufferLoad<uint16_t>(indexBufferAddress + (idxOffset + 0) * sizeof(uint16_t), 2u));
-            i1 = uint32_t(vk::RawBufferLoad<uint16_t>(indexBufferAddress + (idxOffset + 1) * sizeof(uint16_t), 2u));
-            i2 = uint32_t(vk::RawBufferLoad<uint16_t>(indexBufferAddress + (idxOffset + 2) * sizeof(uint16_t), 2u));
-        }
-        break;
+            indices = uint32_t3((nbl::hlsl::bda::__ptr<uint16_t3>::create(indexBufferAddress)+primID).deref().load());
+            break;
         case 1: // EIT_32BIT
-        {
-            i0 = vk::RawBufferLoad<uint32_t>(indexBufferAddress + (idxOffset + 0) * sizeof(uint32_t));
-            i1 = vk::RawBufferLoad<uint32_t>(indexBufferAddress + (idxOffset + 1) * sizeof(uint32_t));
-            i2 = vk::RawBufferLoad<uint32_t>(indexBufferAddress + (idxOffset + 2) * sizeof(uint32_t));
-        }
-        break;
+            indices = uint32_t3((nbl::hlsl::bda::__ptr<uint32_t3>::create(indexBufferAddress)+primID).deref().load());
+            break;
         default:    // EIT_NONE
         {
-            i0 = idxOffset;
-            i1 = idxOffset + 1;
-            i2 = idxOffset + 2;
+            indices[0] = primID * 3;
+            indices[1] = indices[0] + 1;
+            indices[2] = indices[0] + 2;
         }
     }
 
@@ -62,9 +53,10 @@ float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bar
     {
         case OT_CUBE:
         {
-            uint32_t v0 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + i0 * vertexStride, 2u);
-            uint32_t v1 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + i1 * vertexStride, 2u);
-            uint32_t v2 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + i2 * vertexStride, 2u);
+            // TODO: document why the alignment is 2 here and nowhere else? isnt the `vertexStride` aligned to more than 2 anyway?
+            uint32_t v0 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + indices[0] * vertexStride, 2u);
+            uint32_t v1 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + indices[1] * vertexStride, 2u);
+            uint32_t v2 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + indices[2] * vertexStride, 2u);
 
             n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz);
             n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz);
@@ -76,9 +68,9 @@ float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bar
         case OT_ARROW:
         case OT_CONE:
         {
-            uint32_t v0 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + i0 * vertexStride);
-            uint32_t v1 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + i1 * vertexStride);
-            uint32_t v2 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + i2 * vertexStride);
+            uint32_t v0 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + indices[0] * vertexStride);
+            uint32_t v1 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + indices[1] * vertexStride);
+            uint32_t v2 = vk::RawBufferLoad<uint32_t>(vertexBufferAddress + indices[2] * vertexStride);
 
             n0 = normalize(unpackNormals3x10(v0));
             n1 = normalize(unpackNormals3x10(v1));
@@ -90,9 +82,9 @@ float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bar
         case OT_ICOSPHERE:
         default:
         {
-            n0 = normalize(vk::RawBufferLoad<float3>(vertexBufferAddress + i0 * vertexStride));
-            n1 = normalize(vk::RawBufferLoad<float3>(vertexBufferAddress + i1 * vertexStride));
-            n2 = normalize(vk::RawBufferLoad<float3>(vertexBufferAddress + i2 * vertexStride));
+            n0 = normalize(vk::RawBufferLoad<float3>(vertexBufferAddress + indices[0] * vertexStride));
+            n1 = normalize(vk::RawBufferLoad<float3>(vertexBufferAddress + indices[1] * vertexStride));
+            n2 = normalize(vk::RawBufferLoad<float3>(vertexBufferAddress + indices[2] * vertexStride));
         }
     }
 
@@ -132,6 +124,7 @@ void main(uint32_t3 threadID : SV_DispatchThreadID)
         const int instID = spirv::rayQueryGetIntersectionInstanceIdKHR(query, true);
         const int primID = spirv::rayQueryGetIntersectionPrimitiveIndexKHR(query, true);
 
+        // TODO: candidate for `bda::__ptr<SGeomInfo>`
         const SGeomInfo geom = vk::RawBufferLoad<SGeomInfo>(pc.geometryInfoBuffer + instID * sizeof(SGeomInfo));
         
         float3 normals;
diff --git a/70_FLIPFluids/app_resources/compute/advectParticles.comp.hlsl b/70_FLIPFluids/app_resources/compute/advectParticles.comp.hlsl
@@ -16,21 +16,26 @@ cbuffer GridData
     SGridData gridData;
 };
 
+
 [[vk::binding(b_apVelField, s_ap)]] Texture3D<float> velocityField[3];
 [[vk::binding(b_apPrevVelField, s_ap)]] Texture3D<float> prevVelocityField[3];
 [[vk::binding(b_apVelSampler, s_ap)]] SamplerState velocityFieldSampler;
 
-// TODO: delta time push constant? (but then for CI need a commandline `-fixed-timestep=MS` and `-frames=N` option too)
+#include "nbl/builtin/hlsl/bda/__ptr.hlsl"
+using namespace nbl::hlsl;
 
+// TODO: delta time push constant? (but then for CI need a commandline `-fixed-timestep=MS` and `-frames=N` option too)
 [numthreads(WorkgroupSize, 1, 1)]
 void main(uint32_t3 ID : SV_DispatchThreadID)
 {
     uint32_t pid = ID.x;
     Particle p;
 
-    int offset = sizeof(float32_t3) * pid;
-    p.position = vk::RawBufferLoad<float32_t3>(pc.particlePosAddress + offset);
-    p.velocity = vk::RawBufferLoad<float32_t3>(pc.particleVelAddress + offset);
+    // use a restrict reference for speed
+    bda::__ref<float32_t3,4,true> rPosition = (bda::__ptr<float32_t3>::create(pc.particlePosAddress)+pid).deref<4,true>();
+    bda::__ref<float32_t3,4,true> rVelocity = (bda::__ptr<float32_t3>::create(pc.particleVelAddress)+pid).deref<4,true>();
+    p.position = rPosition.load();
+    p.velocity = rVelocity.load();
 
     // advect velocity
     float3 gridPrevVel = sampleVelocityAt(p.position, prevVelocityField, velocityFieldSampler, gridData);
@@ -52,6 +57,6 @@ void main(uint32_t3 ID : SV_DispatchThreadID)
 
     p.position = clampPosition(p.position, gridData.worldMin, gridData.worldMax);
 
-    vk::RawBufferStore<float32_t3>(pc.particlePosAddress + offset, p.position);
-    vk::RawBufferStore<float32_t3>(pc.particleVelAddress + offset, p.velocity);
+    rPosition.store(p.position);
+    rVelocity.store(p.velocity);
 }
diff --git a/70_FLIPFluids/app_resources/compute/genParticleVertices.comp.hlsl b/70_FLIPFluids/app_resources/compute/genParticleVertices.comp.hlsl
@@ -53,6 +53,9 @@ static const float2 quadUVs[4] = {
     float2(1, 1)
 };
 
+#include "nbl/builtin/hlsl/bda/__ptr.hlsl"
+using namespace nbl::hlsl;
+
 [numthreads(WorkgroupSize, 1, 1)]
 void main(uint32_t3 ID : SV_DispatchThreadID)
 {
@@ -112,6 +115,6 @@ void main(uint32_t3 ID : SV_DispatchThreadID)
 
         vertex.uv = quadUVs[vertexOrder[i]];
 
-        vk::RawBufferStore<VertexInfo>(pc.particleVerticesAddress + sizeof(VertexInfo) * (quadBeginIdx + i), vertex);
+        (bda::__ptr<VertexInfo>::create(pc.particleVerticesAddress)+(quadBeginIdx+i)).deref_restrict().store(vertex);
     }
 }
diff --git a/70_FLIPFluids/app_resources/fluidParticles.vertex.hlsl b/70_FLIPFluids/app_resources/fluidParticles.vertex.hlsl
@@ -10,11 +10,15 @@ struct SPushConstants
 
 [[vk::push_constant]] SPushConstants pc;
 
+
+#include "nbl/builtin/hlsl/bda/__ptr.hlsl"
+using namespace nbl::hlsl;
+
 PSInput main(uint vertexID : SV_VertexID)
 {
     PSInput output;
 
-    VertexInfo vertex = vk::RawBufferLoad<VertexInfo>(pc.particleVerticesAddress + sizeof(VertexInfo) * vertexID);
+    VertexInfo vertex = (bda::__ptr<VertexInfo>::create(pc.particleVerticesAddress)+vertexID).deref_restrict().load();
 
     output.position = vertex.position;
     output.vsSpherePos = vertex.vsSpherePos.xyz;
diff --git a/70_FLIPFluids/app_resources/render_common.hlsl b/70_FLIPFluids/app_resources/render_common.hlsl
@@ -1,5 +1,6 @@
 #ifndef _FLIP_EXAMPLE_RENDER_COMMON_HLSL
 #define _FLIP_EXAMPLE_RENDER_COMMON_HLSL
+#include "nbl/builtin/hlsl/bda/struct_declare.hlsl"
 
 struct SParticleRenderParams
 {
@@ -9,16 +10,15 @@ struct SParticleRenderParams
 };
 
 // TODO: This struct shouldn't exist if there's no "vertex generation" shader
-struct VertexInfo
-{
-    // TODO: don't use 4D vectors for 3D quantities
-    float32_t4 position;
-	float32_t4 vsSpherePos;
-    float32_t radius;
-
-    float32_t4 color;
-    float32_t2 uv;
-};
+struct VertexInfo;
+// TODO: don't use 4D vectors for 3D quantities
+NBL_HLSL_DEFINE_STRUCT((VertexInfo),
+    ((position, float32_t4))
+    ((vsSpherePos, float32_t4))
+    ((radius, float32_t))
+    ((color, float32_t4))
+    ((uv, float32_t2))
+);
 
 #ifdef __HLSL_VERSION
 struct PSInput

Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ struct Accessor`
`43`	`43`	`return accessor;`
`44`	`44`	`}`
`45`	`45`
	`46`	+ // TODO: can't use our own BDA yet, because it doesn't support the types `workgroup::FFT` will invoke these templates with
`46`	`47`	`template <typename AccessType>`
`47`	`48`	`void get(const uint32_t index, NBL_REF_ARG(AccessType) value)`
`48`	`49`	`{`
Original file line number	Diff line number	Diff line change
`@@ -5,6 +5,7 @@`
`5`	`5`
`6`	`6`	`NBL_CONSTEXPR uint32_t WorkgroupSize = 16;`
`7`	`7`
	`8`	`+// we need bitfield support in NBL_HLSL_DECLARE_STRUCT it seems`
`8`	`9`	`struct SGeomInfo`
`9`	`10`	`{`
`10`	`11`	`uint64_t vertexBufferAddress;`
Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,9 @@ static const float2 quadUVs[4] = {`
`53`	`53`	`float2(1, 1)`
`54`	`54`	`};`
`55`	`55`
	`56`	`+#include "nbl/builtin/hlsl/bda/__ptr.hlsl"`
	`57`	`+using namespace nbl::hlsl;`
	`58`	`+`
`56`	`59`	`[numthreads(WorkgroupSize, 1, 1)]`
`57`	`60`	`void main(uint32_t3 ID : SV_DispatchThreadID)`
`58`	`61`	`{`
`@@ -112,6 +115,6 @@ void main(uint32_t3 ID : SV_DispatchThreadID)`
`112`	`115`
`113`	`116`	`vertex.uv = quadUVs[vertexOrder[i]];`
`114`	`117`
`115`		`- vk::RawBufferStore<VertexInfo>(pc.particleVerticesAddress + sizeof(VertexInfo) * (quadBeginIdx + i), vertex);`
	`118`	`+ (bda::__ptr<VertexInfo>::create(pc.particleVerticesAddress)+(quadBeginIdx+i)).deref_restrict().store(vertex);`
`116`	`119`	`}`
`117`	`120`	`}`