@@ -395,6 +395,59 @@ namespace hlsl
395395 return bit_cast<this_t>(data ^ ieee754::traits<float64_t>::signMask);
396396 }
397397
398+ /**
399+ * @brief Computes sqare root estimation.
400+ *
401+ * Can be less precise when FastMath is disabled.
402+ * sqrt(inf) = inf
403+ * sqrt(-0) = -0
404+ * sqrt(NaN) = NaN
405+ */
406+ static this_t sqrt (this_t number)
407+ {
408+ bool isZero = !(number.data & 0x7FFFFFFFFFFFFFFFull);
409+ if (isZero)
410+ return number;
411+
412+ static const uint64_t MaxFloat64AsUint64 = 0x7FEFFFFFFFFFFFFFull;
413+ if (number.data > MaxFloat64AsUint64)
414+ {
415+ bool isInf = cpp_compat_intrinsics_impl::isinf_uint_impl (number.data);
416+ if (isInf)
417+ return number;
418+
419+ // when (number.data > MaxFloat64AsUint64) and is not infinity, we can be sure that number is either NaN or negative
420+ return bit_cast<this_t>(ieee754::traits<this_t>::quietNaN);
421+ }
422+
423+ const float f32InverseSquareRoot = nbl::hlsl::rsqrt (_static_cast<float >(number));
424+
425+ // find sqrt approximation using the Newton-Raphson method
426+ this_t inverseSquareRoot = _static_cast<this_t>(f32InverseSquareRoot);
427+ const int Iterations = 5 ;
428+ static const this_t Half = this_t::create (0.5f );
429+ static const this_t ThreeHalfs = this_t::create (1.5f );
430+ const this_t x2 = number * Half;
431+ [[unroll]]
432+ for (int i = 0 ; i < Iterations; ++i)
433+ {
434+ inverseSquareRoot = inverseSquareRoot * (ThreeHalfs - (x2 * inverseSquareRoot * inverseSquareRoot));
435+ }
436+
437+ if (FastMath)
438+ {
439+ return this_t::create (1.0f ) / inverseSquareRoot;
440+ }
441+ else
442+ {
443+ // 2 Newton-Raphson iterations to increase precision
444+ this_t squareRoot = this_t::create (1.0f ) / inverseSquareRoot;
445+ squareRoot = Half * (squareRoot + number / squareRoot);
446+ squareRoot = Half * (squareRoot + number / squareRoot);
447+ return squareRoot;
448+ }
449+ }
450+
398451 NBL_CONSTEXPR_STATIC bool isFastMathSupported = FastMath;
399452 };
400453
0 commit comments