From 4890bcb5cfc72d6703db49c4f5450516a6ef830a Mon Sep 17 00:00:00 2001 From: KirillAlekseenko Date: Fri, 6 Dec 2024 16:05:16 +0300 Subject: [PATCH 1/4] possible naive RCAS negative output fix --- sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h index 82ebf21f..6521d902 100644 --- a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h +++ b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h @@ -762,6 +762,9 @@ void FsrEasuH( // Apply noise removal. #ifdef FSR_RCAS_DENOISE lobe *= nz; +#endif +#ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION + lobe *= ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL)); #endif // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); From 1ab2910f79df37e530a86345aca5f634dafd0928 Mon Sep 17 00:00:00 2001 From: KirillAlekseenko Date: Fri, 6 Dec 2024 16:24:08 +0300 Subject: [PATCH 2/4] added define in ffx_fsr1_rcas.h for testing --- sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h index 2c26606b..f52aae17 100644 --- a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h +++ b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h @@ -22,6 +22,7 @@ #define GROUP_SIZE 8 #define FSR_RCAS_DENOISE 1 +#define FSR_RCAS_LOWER_LIMITER_COMPENSATION 1 #include "ffx_core.h" From 25d6935a0fe5d44e0d70aa6b3ba9ff4c0cc50d34 Mon Sep 17 00:00:00 2001 From: KirillAlekseenko Date: Fri, 6 Dec 2024 16:28:56 +0300 Subject: [PATCH 3/4] define for fsr3 rcas --- sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h index f48cffb9..d394315f 100644 --- a/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h +++ b/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h @@ -22,6 +22,7 @@ #define GROUP_SIZE 8 #define FSR_RCAS_DENOISE 1 +#define FSR_RCAS_LOWER_LIMITER_COMPENSATION 1 #include "ffx_core.h" From 47009184aa27cad605355a5ca50a7ab6fe72cd21 Mon Sep 17 00:00:00 2001 From: KirillAlekseenko Date: Fri, 6 Dec 2024 18:06:25 +0300 Subject: [PATCH 4/4] correction is applied only to lower limit --- sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h | 36 ++++++++++++++-------- 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h index 6521d902..c3edcfee 100644 --- a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h +++ b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h @@ -748,9 +748,14 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0); // Limiters, these need to be high precision RCPs. - FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R); - FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G); - FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B); + #ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION + const FfxFloat32 lowerLimiterMultiplier = ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL)); + #else + const FfxFloat32 lowerLimiterMultiplier = 1.f; + #endif + FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R) * lowerLimiterMultiplier; + FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G) * lowerLimiterMultiplier; + FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B) * lowerLimiterMultiplier; FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y); FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y); FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y); @@ -762,9 +767,6 @@ void FsrEasuH( // Apply noise removal. #ifdef FSR_RCAS_DENOISE lobe *= nz; -#endif -#ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION - lobe *= ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL)); #endif // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes. FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0)); @@ -851,9 +853,14 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); // Limiters, these need to be high precision RCPs. - FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R); - FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G); - FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B); + #ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION + const FfxFloat16 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL)); + #else + const FfxFloat16 lowerLimiterMultiplier = 1.f; + #endif + FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R)*lowerLimiterMultiplier; + FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G)*lowerLimiterMultiplier; + FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B)*lowerLimiterMultiplier; FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y); FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y); FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y); @@ -970,9 +977,14 @@ void FsrEasuH( // Immediate constants for peak range. FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0); // Limiters, these need to be high precision RCPs. - FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R); - FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G); - FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B); + #ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION + const FfxFloat16x2 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL)); + #else + const FfxFloat16x2 lowerLimiterMultiplier = 1.f; + #endif + FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R)*lowerLimiterMultiplier; + FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G)*lowerLimiterMultiplier; + FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B)*lowerLimiterMultiplier; FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y); FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y); FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);