@@ -380,26 +380,28 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
380380 template <uint32_t dimensions, E_FORMAT CacheFormat>
381381 value_type_t <CacheFormat> quantize (const hlsl::vector<hlsl::float32_t , dimensions>& value)
382382 {
383- auto to_float32_t4 = [](hlsl::vector<hlsl::float32_t , dimensions> src) -> hlsl::float32_t4
383+ using float32_tN = hlsl::vector<hlsl::float32_t , dimensions>;
384+
385+ auto to_vec_t4 = []<typename T>(hlsl::vector<T, dimensions> src, T padValue) -> hlsl::vector<T, 4 >
384386 {
385387 if constexpr (dimensions == 1 )
386388 {
387- return {src.x , 0 , 0 , 0 };
389+ return {src.x , padValue, padValue, padValue };
388390 } else if constexpr (dimensions == 2 )
389391 {
390- return {src.x , src.y , 0 , 0 };
392+ return {src.x , src.y , padValue, padValue };
391393 } else if constexpr (dimensions == 3 )
392394 {
393- return {src.x , src.y , src.z , 0 };
395+ return {src.x , src.y , src.z , padValue };
394396 } else if constexpr (dimensions == 4 )
395397 {
396398 return {src.x , src.y , src.z , src.w };
397399 }
398400 };
399401
400- const auto negativeMask = to_float32_t4 (lessThan (value, hlsl::vector<hlsl:: float32_t , dimensions> (0 .0f )));
402+ const auto negativeMask = to_vec_t4 (lessThan (value, float32_tN (0 .0f )), false );
401403
402- const hlsl::vector<hlsl:: float32_t , dimensions> absValue = abs (value);
404+ const float32_tN absValue = abs (value);
403405 const auto key = Key (absValue);
404406
405407 constexpr auto quantizationBits = quantization_bits_v<CacheFormat>;
@@ -413,14 +415,14 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
413415 {
414416 const auto fit = findBestFit<dimensions,quantizationBits>(absValue);
415417
416- const auto abs_fit = to_float32_t4 (abs (fit));
418+ const auto abs_fit = to_vec_t4 (abs (fit), 0 . f );
417419 quantized = hlsl::uint32_t4 (abs_fit.x , abs_fit.y , abs_fit.z , abs_fit.w );
418420
419421 insertIntoCache<CacheFormat>(key,quantized);
420422 }
421423 }
422424
423- auto switch_vec = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
425+ auto select = [](hlsl::uint32_t4 val1, hlsl::uint32_t4 val2, hlsl::bool4 mask)
424426 {
425427 hlsl::uint32_t4 retval;
426428 retval.x = mask.x ? val2.x : val1.x ;
@@ -435,25 +437,26 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
435437
436438 // for positive number xoring with 0 keep its value
437439 // for negative number we xor with all one which will flip the bits, then we add one later. Flipping the bits then adding one will turn positive number into negative number
438- auto restoredAsVec = quantized.getValue () ^ switch_vec (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (xorflag), negativeMask);
439- restoredAsVec += switch_vec (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (1u ), negativeMask);
440+ auto restoredAsVec = quantized.getValue () ^ select (hlsl::uint32_t4 (0u ), hlsl::uint32_t4 (xorflag), negativeMask);
441+ restoredAsVec += (hlsl::uint32_t4 (1u ) & hlsl::uint32_t4 (negativeMask) );
440442
441443 return value_type_t <CacheFormat>(restoredAsVec);
442444 }
443445
444446 template <uint32_t dimensions, uint32_t quantizationBits>
445447 static inline hlsl::vector<hlsl::float32_t , dimensions> findBestFit (const hlsl::vector<hlsl::float32_t , dimensions>& value)
446448 {
449+ using float32_tN = hlsl::vector<hlsl::float32_t , dimensions>;
447450 static_assert (dimensions>1u ," No point" );
448451 static_assert (dimensions<=4u ," High Dimensions are Hard!" );
449452
450453 const auto vectorForDots = hlsl::normalize (value);
451454
452455 //
453- hlsl::vector<hlsl:: float32_t , dimensions> fittingVector;
454- hlsl::vector<hlsl:: float32_t , dimensions> floorOffset = {};
456+ float32_tN fittingVector;
457+ float32_tN floorOffset = {};
455458 constexpr uint32_t cornerCount = (0x1u <<(dimensions-1u ))-1u ;
456- hlsl::vector<hlsl:: float32_t , dimensions> corners[cornerCount] = {};
459+ float32_tN corners[cornerCount] = {};
457460 {
458461 uint32_t maxDirCompIndex = 0u ;
459462 for (auto i=1u ; i<dimensions; i++)
@@ -465,7 +468,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
465468 if (maxDirectionComp < std::sqrtf (0 .9998f / float (dimensions)))
466469 {
467470 _NBL_DEBUG_BREAK_IF (true );
468- return hlsl::vector<hlsl:: float32_t , dimensions> (0 .f );
471+ return float32_tN (0 .f );
469472 }
470473 fittingVector = value / maxDirectionComp;
471474 floorOffset[maxDirCompIndex] = 0 .499f ;
@@ -487,9 +490,9 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
487490 }
488491 }
489492
490- hlsl::vector<hlsl:: float32_t , dimensions> bestFit;
493+ float32_tN bestFit;
491494 float closestTo1 = -1 .f ;
492- auto evaluateFit = [&](const hlsl::vector<hlsl:: float32_t , dimensions> & newFit) -> void
495+ auto evaluateFit = [&](const float32_tN & newFit) -> void
493496 {
494497 auto newFitLen = length (newFit);
495498 const float dp = hlsl::dot (newFit,vectorForDots) / (newFitLen);
@@ -502,7 +505,7 @@ class CDirQuantCacheBase : public virtual core::IReferenceCounted, public impl::
502505
503506 constexpr uint32_t cubeHalfSize = (0x1u << quantizationBits) - 1u ;
504507 const auto test = core::vectorSIMDf (cubeHalfSize);
505- const hlsl::vector<hlsl:: float32_t , dimensions> cubeHalfSizeND = hlsl::vector<hlsl:: float32_t , dimensions> (cubeHalfSize);
508+ const float32_tN cubeHalfSizeND = float32_tN (cubeHalfSize);
506509 for (uint32_t n=cubeHalfSize; n>0u ; n--)
507510 {
508511 // we'd use float addition in the interest of speed, to increment the loop
0 commit comments