From 4890bcb5cfc72d6703db49c4f5450516a6ef830a Mon Sep 17 00:00:00 2001
From: KirillAlekseenko <kir967960@yandex.ru>
Date: Fri, 6 Dec 2024 16:05:16 +0300
Subject: [PATCH 1/4] possible naive RCAS negative output fix

---
 sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h
index 82ebf21f..6521d902 100644
--- a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h
+++ b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h
@@ -762,6 +762,9 @@ void FsrEasuH(
  // Apply noise removal.
 #ifdef FSR_RCAS_DENOISE
      lobe *= nz;
+#endif
+#ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
+     lobe *= ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL)); 
 #endif
      // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
      FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0));

From 1ab2910f79df37e530a86345aca5f634dafd0928 Mon Sep 17 00:00:00 2001
From: KirillAlekseenko <kir967960@yandex.ru>
Date: Fri, 6 Dec 2024 16:24:08 +0300
Subject: [PATCH 2/4] added define in ffx_fsr1_rcas.h for testing

---
 sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h
index 2c26606b..f52aae17 100644
--- a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h
+++ b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1_rcas.h
@@ -22,6 +22,7 @@
 
 #define GROUP_SIZE  8
 #define FSR_RCAS_DENOISE 1
+#define FSR_RCAS_LOWER_LIMITER_COMPENSATION 1
 
 #include "ffx_core.h"
 

From 25d6935a0fe5d44e0d70aa6b3ba9ff4c0cc50d34 Mon Sep 17 00:00:00 2001
From: KirillAlekseenko <kir967960@yandex.ru>
Date: Fri, 6 Dec 2024 16:28:56 +0300
Subject: [PATCH 3/4] define for fsr3 rcas

---
 sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h b/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h
index f48cffb9..d394315f 100644
--- a/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h
+++ b/sdk/include/FidelityFX/gpu/fsr3upscaler/ffx_fsr3upscaler_rcas.h
@@ -22,6 +22,7 @@
 
 #define GROUP_SIZE  8
 #define FSR_RCAS_DENOISE 1
+#define FSR_RCAS_LOWER_LIMITER_COMPENSATION 1
 
 #include "ffx_core.h"
 

From 47009184aa27cad605355a5ca50a7ab6fe72cd21 Mon Sep 17 00:00:00 2001
From: KirillAlekseenko <kir967960@yandex.ru>
Date: Fri, 6 Dec 2024 18:06:25 +0300
Subject: [PATCH 4/4] correction is applied only to lower limit

---
 sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h | 36 ++++++++++++++--------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h
index 6521d902..c3edcfee 100644
--- a/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h
+++ b/sdk/include/FidelityFX/gpu/fsr1/ffx_fsr1.h
@@ -748,9 +748,14 @@ void FsrEasuH(
      // Immediate constants for peak range.
      FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
      // Limiters, these need to be high precision RCPs.
-     FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R);
-     FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G);
-     FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B);
+     #ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
+      const FfxFloat32 lowerLimiterMultiplier = ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL)); 
+     #else
+      const FfxFloat32 lowerLimiterMultiplier = 1.f;
+     #endif
+     FfxFloat32 hitMinR = mn4R * ffxReciprocal(FfxFloat32(4.0) * mx4R) * lowerLimiterMultiplier;
+     FfxFloat32 hitMinG = mn4G * ffxReciprocal(FfxFloat32(4.0) * mx4G) * lowerLimiterMultiplier;
+     FfxFloat32 hitMinB = mn4B * ffxReciprocal(FfxFloat32(4.0) * mx4B) * lowerLimiterMultiplier;
      FfxFloat32 hitMaxR = (peakC.x - mx4R) * ffxReciprocal(FfxFloat32(4.0) * mn4R + peakC.y);
      FfxFloat32 hitMaxG = (peakC.x - mx4G) * ffxReciprocal(FfxFloat32(4.0) * mn4G + peakC.y);
      FfxFloat32 hitMaxB = (peakC.x - mx4B) * ffxReciprocal(FfxFloat32(4.0) * mn4B + peakC.y);
@@ -762,9 +767,6 @@ void FsrEasuH(
  // Apply noise removal.
 #ifdef FSR_RCAS_DENOISE
      lobe *= nz;
-#endif
-#ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
-     lobe *= ffxSaturate(eL / ffxMin(ffxMin3(bL, dL, fL), hL)); 
 #endif
      // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
      FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0));
@@ -851,9 +853,14 @@ void FsrEasuH(
   // Immediate constants for peak range.
   FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
   // Limiters, these need to be high precision RCPs.
-  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
-  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
-  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
+  #ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
+   const FfxFloat16 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL)); 
+  #else
+   const FfxFloat16 lowerLimiterMultiplier = 1.f;
+  #endif
+  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R)*lowerLimiterMultiplier;
+  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G)*lowerLimiterMultiplier;
+  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B)*lowerLimiterMultiplier;
   FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
   FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
   FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
@@ -970,9 +977,14 @@ void FsrEasuH(
   // Immediate constants for peak range.
   FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
   // Limiters, these need to be high precision RCPs.
-  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
-  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
-  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
+  #ifdef FSR_RCAS_LOWER_LIMITER_COMPENSATION
+   const FfxFloat16x2 lowerLimiterMultiplier = ffxSaturate(eL / min(ffxMin3Half(bL, dL, fL), hL)); 
+  #else
+   const FfxFloat16x2 lowerLimiterMultiplier = 1.f;
+  #endif
+  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R)*lowerLimiterMultiplier;
+  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G)*lowerLimiterMultiplier;
+  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B)*lowerLimiterMultiplier;
   FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
   FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
   FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);