From da9305f1f576bd18bd3a62e14113e94d793515b0 Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Wed, 10 Apr 2024 18:53:08 +0200 Subject: [PATCH 01/41] Update README.md IES XML node respec --- 22.RaytracedAO/README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/22.RaytracedAO/README.md b/22.RaytracedAO/README.md index 374c16e2e..18d1c0e74 100644 --- a/22.RaytracedAO/README.md +++ b/22.RaytracedAO/README.md @@ -165,13 +165,14 @@ So the full width, height are 1152x1152 (64+1024+64=1152) | Property Name | Description | Type | Default Value | |---------------|------------------------------------------------------------------------------------------------------------------|-------|---------------| -| normalizeEnergy | Parameter to normalize the intensity of emission profile.
1) If `normalizeEnergy` is zero, it will not perform any normalization. (no normalization)
2) If `normalizeEnergy` is negative, it will normalize the intensity by dividing out the maximum intensity. (normalization by max)
3) If `normalizeEnergy` is positive, it will first normalize the intensity by average energy and multiply `normalizeEnergy` to intensity. (normalization by energy) | float | 0.0 (no normalization) | -| filename | The filename of the IES profile. | string | "" | +| normalization | Parameter to normalize the intensity of emission profile.
1) If `normalization` is `NONE` or invalid/none of the below, it will not perform any normalization.
2) If `normalization` is `UNIT_MAX`, it will normalize the intensity by dividing out the maximum intensity. (normalization by max)
3) If `normalization` is `UNIT_AVERAGE_OVER_IMPLIED_DOMAIN`, it will integrate the profile over the hemisphere as well as the solid angles where the profile has emission above 0. This has an advantage over a plain average as you don't need to care whether the light is a sphere, hemisphere, or a spotlight of a given aperture. (normalization by energy)
4) If `normalization` is `UNIT_AVERAGE_OVER_FULL_DOMAIN` we behave like `UNIT_AVERAGE` but presume the soild angle of the domain is `(CIESProfile::vAngles.front()-CIESProfile::vAngles.back())*4.f` | string | ""
(no normalization) | +| flatten | Optional "blend" of the original profile value with the average value, if negative we use the average as if for `UNIT_AVERAGE_OVER_FULL_DOMAIN` if positive we use the average as-if for `UNIT_AVERAGE_OVER_IMPLIED_DOMAIN`.
This is useful when the emitter appears "not bright enough" when observing from directions outside the main power-lobes.
Valid range is 0.0 to 1.0, value gets treated with `min(abs(flatten),1.f)` to make it conform.
A value equal to 1.0 or -1.0 will render your IES profile uniform, so its not something you should use and a warning will be emitted. | float | 0.0 | +| filename | The filename of the IES profile. | string | "" | NOTE: **\** tag of emitter node can be used to orient the emission direction of IES light. -#### Example of Area Light with IES Profile +#### Example of Area Light with IES Profile which flattens its profile against a full Sphere or Hemisphere average ```xml @@ -180,7 +181,8 @@ NOTE: **\** tag of emitter node can be used to orient the emission d - + + From c89603d85c347f2f14e43a2d84f69d19c13d5c21 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Sat, 13 Apr 2024 11:32:13 +0200 Subject: [PATCH 02/41] update media submodule --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index 6f5346ff8..d67e199a9 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 6f5346ff8f20f0bedeaa9c58a715ab4d6fce661c +Subproject commit d67e199a9d834ba94d86bd3a7c7280cc0836809e From 3fd39cbe836d9f048cefb4eae47645a40301c139 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 17 Apr 2024 10:37:49 +0200 Subject: [PATCH 03/41] update media submodule --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index d67e199a9..4aeef87fb 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit d67e199a9d834ba94d86bd3a7c7280cc0836809e +Subproject commit 4aeef87fb154e5de207a6e7e24d6cf9842fb071a From 84fd4b2c4d7c79d07609a9fa4ba3ebc2c8327330 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Tue, 23 Apr 2024 16:46:44 +0200 Subject: [PATCH 04/41] make IES example more test friendly, add inputs.json with IES references (supports inputs as directories & files, have writeAssets & GUI mode), reference new IES files, adjust main.cpp to new changes, update media submodule, remove old test.ies --- 50.IESProfileTest/CMakeLists.txt | 5 +- 50.IESProfileTest/inputs.json | 14 +++ 50.IESProfileTest/main.cpp | 195 +++++++++++++++++++++++++++---- 50.IESProfileTest/test.ies | 30 ----- media | 2 +- 5 files changed, 189 insertions(+), 57 deletions(-) create mode 100644 50.IESProfileTest/inputs.json delete mode 100644 50.IESProfileTest/test.ies diff --git a/50.IESProfileTest/CMakeLists.txt b/50.IESProfileTest/CMakeLists.txt index 71c002ad5..52e8e83f2 100644 --- a/50.IESProfileTest/CMakeLists.txt +++ b/50.IESProfileTest/CMakeLists.txt @@ -4,4 +4,7 @@ if(NOT RES) message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") endif() -nbl_create_executable_project("" "" "" "") \ No newline at end of file +nbl_create_executable_project("" "" "" nlohmann_json::nlohmann_json) + +add_dependencies(${EXECUTABLE_NAME} nlohmann_json::nlohmann_json) +target_include_directories(${EXECUTABLE_NAME} PUBLIC $) \ No newline at end of file diff --git a/50.IESProfileTest/inputs.json b/50.IESProfileTest/inputs.json new file mode 100644 index 000000000..d6b4ce528 --- /dev/null +++ b/50.IESProfileTest/inputs.json @@ -0,0 +1,14 @@ +{ + "directories": [ + "../media/mitsuba/ies/packages/leomoon-dot-com_ies-lights-pack/ies-lights-pack" + ], + "files": [ + "../media/mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies", + "../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies", + "../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies", + "../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies", + "../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies" + ], + "gui": true, + "writeAssets": false +} \ No newline at end of file diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 79106697b..489a79d56 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -2,14 +2,37 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#define BENCHMARK_TILL_FIRST_FRAME + #include #include #include #include "nbl/ext/ScreenShot/ScreenShot.h" #include "compute/common.h" +#include + +// small hack to compile with the json library +namespace std +{ + int sprintf_s(char* buffer, size_t size, const char* format, ...) { + va_list args; + va_start(args, format); + int result = ::sprintf_s(buffer, size, format, args); + va_end(args); + return result; + } +} + +#include "nlohmann/json.hpp" using namespace nbl; using namespace core; +using json = nlohmann::json; + +#ifdef BENCHMARK_TILL_FIRST_FRAME +const std::chrono::steady_clock::time_point startBenchmark = std::chrono::high_resolution_clock::now(); +bool stopBenchamrkFlag = false; +#endif class IESCompute { @@ -91,6 +114,16 @@ class IESCompute { driver->blitRenderTargets(fbo, nullptr, false, false); driver->endScene(); + + #ifdef BENCHMARK_TILL_FIRST_FRAME + if (!stopBenchamrkFlag) + { + const std::chrono::steady_clock::time_point stopBenchmark = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(stopBenchmark - startBenchmark); + std::cout << "Time taken till first render pass: " << duration.count() << " milliseconds" << std::endl; + stopBenchamrkFlag = true; + } + #endif } void updateZDegree(const asset::CIESProfile::IES_STORAGE_FORMAT& degreeOffset) @@ -544,38 +577,149 @@ int main() asset::IAssetLoader::SAssetLoadParams lparams; lparams.loaderFlags; - - constexpr auto IES_INPUTS = std::array - { - std::string_view("../../media/mitsuba/ies/ISOTROPIC/007cfb11e343e2f42e3b476be4ab684e.ies"), - std::string_view("../../media/mitsuba/ies/ANIISOTROPIC/QUAD_SYMMETRY/0275171fb664c1b3f024d1e442a68d22.ies"), - std::string_view("../../media/mitsuba/ies/ANIISOTROPIC/HALF_SYMMETRY/1392a1ba55b67d3e0ae7fd63527f3e78.ies"), - std::string_view("../../media/mitsuba/ies/ANIISOTROPIC/OTHER_HALF_SYMMETRY/028e97564391140b1476695ae7a46fa4.ies"), - std::string_view("../../media/mitsuba/ies/NO_LATERAL_SYMMET/4b88bf886b39cfa63094e70e1afa680e.ies"), + + auto readJSON = [](const std::string& filePath) + { + std::ifstream file(filePath.data()); + if (!file.is_open()) { + printf("Invalid input json \"%s\" file! Aborting..", filePath.data()); + exit(0x45); + } + + std::stringstream buffer; + buffer << file.rdbuf(); + + return buffer.str(); }; + const auto INPUT_JSON_FILE_PATH_FS = std::filesystem::absolute("../inputs.json"); + const auto INPUT_JSON_FILE_PATH = INPUT_JSON_FILE_PATH_FS.string(); + const auto jsonBuffer = readJSON(INPUT_JSON_FILE_PATH); + if (jsonBuffer.empty()) { + printf("Read input json \"%s\" file is empty! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + const auto jsonMap = json::parse(jsonBuffer.c_str()); + + if (!jsonMap["directories"].is_array()) + { + printf("Input json \"%s\" file's field \"directories\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + if (!jsonMap["files"].is_array()) + { + printf("Input json \"%s\" file's field \"files\" is not an array! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + if (!jsonMap["writeAssets"].is_boolean()) + { + printf("Input json \"%s\" file's field \"writeAssets\" is not a boolean! Aborting..\n", INPUT_JSON_FILE_PATH.c_str()); + exit(0x45); + } + + const auto&& IES_INPUTS = [&]() + { + std::vector inputFilePaths; + + auto addFile = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS](const std::string_view filePath) -> void + { + auto path = std::filesystem::path(filePath); + + if (!path.is_absolute()) + path = std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / path); + + if (std::filesystem::exists(path) && std::filesystem::is_regular_file(path) && path.extension() == ".ies") + inputFilePaths.push_back(path.string()); + else + { + printf("Invalid input path \"%s\"! Aborting..\n", path.string().c_str()); + exit(0x45); + } + }; + + auto addFiles = [&inputFilePaths, &INPUT_JSON_FILE_PATH_FS, &addFile](const std::string_view directoryPath) -> void + { + auto directory(std::filesystem::absolute(INPUT_JSON_FILE_PATH_FS.parent_path() / directoryPath)); + if (!std::filesystem::exists(directory) || !std::filesystem::is_directory(directory)) { + printf("Invalid input directory \"%s\"! Aborting..\n", directoryPath.data()); + exit(0x45); + } + + for (const auto& entry : std::filesystem::directory_iterator(directory)) + addFile(entry.path().string().c_str()); + }; + + // parse json + { + std::vector jDirectories; + jsonMap["directories"].get_to(jDirectories); + + for (const auto& it : jDirectories) + addFiles(it); + + std::vector jFiles; + jsonMap["files"].get_to(jFiles); + + for (const auto& it : jFiles) + addFile(it); + } + + return std::move(inputFilePaths); + }(); + + const bool GUI = [&]() + { + bool b = false; + jsonMap["gui"].get_to(b); + + return b; + }(); + + const bool WRITE_ASSETS = [&]() + { + bool b = false; + jsonMap["writeAssets"].get_to(b); + + return b; + }(); + const auto ASSETS = [&]() { + size_t loaded = {}, total = IES_INPUTS.size(); std::vector assets; std::vector outStems; - for (size_t i = 0; i < IES_INPUTS.size(); ++i) + for (size_t i = 0; i < total; ++i) { - auto asset = device->getAssetManager()->getAsset(IES_INPUTS[i].data(), lparams); - const auto stem = std::filesystem::path(IES_INPUTS[i].data()).stem().string(); + auto asset = device->getAssetManager()->getAsset(IES_INPUTS[i].c_str(), lparams); + const auto* path = IES_INPUTS[i].c_str(); + const auto stem = std::filesystem::path(IES_INPUTS[i].c_str()).stem().string(); if (asset.getMetadata()) { assets.emplace_back(std::move(asset)); outStems.push_back(stem); + ++loaded; } else - printf("Could not load metadata from \"%s\" asset! Skipping..", stem.c_str()); + printf("Could not load metadata from \"%s\" asset! Skipping..\n", path); } + printf("Loaded [%s/%s] assets! Status: %s\n", std::to_string(loaded).c_str(), std::to_string(total).c_str(), loaded == total ? "PASSING" : "FAILING"); return std::make_pair(assets, outStems); }(); + if (GUI) + printf("GUI Mode: ON\n"); + else + { + printf("GUI Mode: OFF\nExiting..."); + exit(0); + } + IESCompute iesComputeEnvironment(driver, am, ASSETS.first); IESExampleEventReceiver receiver; device->setEventReceiver(&receiver); @@ -640,22 +784,23 @@ int main() receiver.reset(); } - for (size_t i = 0; i < ASSETS.first.size(); ++i) - { - const auto& bundle = ASSETS.first[i]; - const auto& stem = ASSETS.second[i]; + if(WRITE_ASSETS) + for (size_t i = 0; i < ASSETS.first.size(); ++i) + { + const auto& bundle = ASSETS.first[i]; + const auto& stem = ASSETS.second[i]; - const auto& profile = bundle.getMetadata()->selfCast()->profile; - // const std::string out = std::filesystem::absolute("out/cpu/" + std::string(getProfileRS(profile)) + "/" + stem + ".png").string(); TODO (?): why its not working? - const std::string out = std::filesystem::absolute(std::string(getProfileRS(profile)) + "_" + stem + ".png").string(); + const auto& profile = bundle.getMetadata()->selfCast()->profile; + // const std::string out = std::filesystem::absolute("out/cpu/" + std::string(getProfileRS(profile)) + "/" + stem + ".png").string(); TODO (?): why its not working? ah touch required probably first + const std::string out = std::filesystem::absolute(std::string(getProfileRS(profile)) + "_" + stem + ".png").string(); - asset::IAssetWriter::SAssetWriteParams wparams(bundle.getContents().begin()->get()); + asset::IAssetWriter::SAssetWriteParams wparams(bundle.getContents().begin()->get()); - if (am->writeAsset(out.c_str(), wparams)) - printf("Saved \"%s\"\n", out.c_str()); - else - printf("Could not write \"%s\"\n", out.c_str()); - } + if (am->writeAsset(out.c_str(), wparams)) + printf("Saved \"%s\"\n", out.c_str()); + else + printf("Could not write \"%s\"\n", out.c_str()); + } return 0; } \ No newline at end of file diff --git a/50.IESProfileTest/test.ies b/50.IESProfileTest/test.ies deleted file mode 100644 index 8e00804c3..000000000 --- a/50.IESProfileTest/test.ies +++ /dev/null @@ -1,30 +0,0 @@ -IESNA:LM-63-1995 -[TEST] -[TESTLAB] BEGA -[MANUFAC] BEGA -[MORE] Copyright LUMCat V -[LUMCAT] -[LUMINAIRE] 84483K3 (Preliminary) -[ISSUEDATE] 2020-07-22 -[LAMPCAT] LED 24W -[LAMP] 2500 lm,27 W -TILT=NONE -1 -1 1.0 73 1 1 2 -0.485 0.000 0.130 -1.0 1.0 27 - 0.0 2.5 5.0 7.5 10.0 12.5 15.0 17.5 20.0 22.5 25.0 27.5 30.0 - 32.5 35.0 37.5 40.0 42.5 45.0 47.5 50.0 52.5 55.0 57.5 60.0 62.5 - 65.0 67.5 70.0 72.5 75.0 77.5 80.0 82.5 85.0 87.5 90.0 92.5 95.0 - 97.5 100.0 102.5 105.0 107.5 110.0 112.5 115.0 117.5 120.0 122.5 125.0 127.5 - 130.0 132.5 135.0 137.5 140.0 142.5 145.0 147.5 150.0 152.5 155.0 157.5 160.0 - 162.5 165.0 167.5 170.0 172.5 175.0 177.5 180.0 - 0.0 - 688.3 686.8 684.0 680.3 675.3 668.8 660.9 650.7 - 638.6 624.9 609.6 593.0 575.2 556.3 536.5 516.3 - 495.7 475.4 455.7 436.5 417.0 397.4 378.0 359.1 - 340.8 322.9 305.3 287.9 270.9 253.9 237.1 220.5 - 204.1 187.8 171.8 156.0 140.5 125.4 110.7 96.4 - 82.6 69.3 56.5 44.6 33.6 23.7 15.3 8.7 - 4.1 1.4 0.3 0.1 0.1 0.0 0.0 0.0 - 0.0 0.0 0.0 0.0 0.1 0.2 0.4 0.6 - 1.0 1.3 1.4 1.4 1.4 1.3 0.8 0.5 - 0.4 diff --git a/media b/media index 4aeef87fb..6a9bdc080 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 4aeef87fb154e5de207a6e7e24d6cf9842fb071a +Subproject commit 6a9bdc0806ca640dcf563cd613a7cc77e3b75529 From 1762c619f2af189c7749e924cfb4ca74170e201b Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Wed, 24 Apr 2024 20:00:52 +0200 Subject: [PATCH 05/41] update media submodule --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index 6a9bdc080..4c2ef1ff1 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 6a9bdc0806ca640dcf563cd613a7cc77e3b75529 +Subproject commit 4c2ef1ff1018f832c0daa285f25b1f08c0289ba7 From fb91e36e0a0d34eb9f92135e7587deb875c05f46 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Thu, 25 Apr 2024 11:18:39 +0200 Subject: [PATCH 06/41] update media submodule --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index 4c2ef1ff1..2b8e2a506 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 4c2ef1ff1018f832c0daa285f25b1f08c0289ba7 +Subproject commit 2b8e2a50640597faaf9c6cfaaddeff321b8e41fa From 2981984906798fc159bf4e0c29450ce4ddfdf937 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 26 Apr 2024 13:04:27 +0200 Subject: [PATCH 07/41] adjust the example to new changes, flush descriptor sets on profile change (now for graphics pipeline too), remove hardcoded textures' dimensions and use optimal resolution for IES data --- 50.IESProfileTest/compute/cdc.comp | 8 +- 50.IESProfileTest/compute/common.h | 1 - 50.IESProfileTest/main.cpp | 165 ++++++++++++++++------------- 3 files changed, 95 insertions(+), 79 deletions(-) diff --git a/50.IESProfileTest/compute/cdc.comp b/50.IESProfileTest/compute/cdc.comp index 037d01a36..390d63acb 100644 --- a/50.IESProfileTest/compute/cdc.comp +++ b/50.IESProfileTest/compute/cdc.comp @@ -224,11 +224,11 @@ bool isWithinSCDomain(vec2 point) void main() { - const float VERTICAL_INVERSE = 1.0f / TEXTURE_SIZE; - const float HORIZONTAL_INVERSE = 1.0f / TEXTURE_SIZE; - - const ivec2 pixelCoordinates = ivec2(gl_GlobalInvocationID.xy); const ivec2 destinationSize = imageSize(outIESCandelaImage); + const ivec2 pixelCoordinates = ivec2(gl_GlobalInvocationID.xy); + + const float VERTICAL_INVERSE = 1.0f / float(destinationSize.x); + const float HORIZONTAL_INVERSE = 1.0f / float(destinationSize.y); if (all(lessThan(pixelCoordinates, destinationSize))) { diff --git a/50.IESProfileTest/compute/common.h b/50.IESProfileTest/compute/common.h index 378625732..edbc94104 100644 --- a/50.IESProfileTest/compute/common.h +++ b/50.IESProfileTest/compute/common.h @@ -8,7 +8,6 @@ #define M_HALF_PI M_PI/2.0f // would be cool if we have this define somewhere or GLSL do #define QUANT_ERROR_ADMISSIBLE 1/1024 -#define TEXTURE_SIZE 1024u #define WORKGROUP_SIZE 256u #define WORKGROUP_DIMENSION 16u diff --git a/50.IESProfileTest/main.cpp b/50.IESProfileTest/main.cpp index 489a79d56..7aa640f67 100644 --- a/50.IESProfileTest/main.cpp +++ b/50.IESProfileTest/main.cpp @@ -94,7 +94,7 @@ class IESCompute driver->bindDescriptorSets(EPBP_COMPUTE, gpue.cPipeline->getLayout(), 0u, 1u, &gpue.cDescriptorSet.get(), nullptr); driver->pushConstants(gpue.cPipeline->getLayout(), asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant), &pushConstant); - _NBL_STATIC_INLINE_CONSTEXPR auto xGroups = (TEXTURE_SIZE - 1u) / WORKGROUP_DIMENSION + 1u; + const auto xGroups = (getActiveProfile().getOptimalIESResolution().x - 1u) / WORKGROUP_DIMENSION + 1u; driver->dispatch(xGroups, xGroups, 1u); COpenGLExtensionHandler::extGlMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); @@ -142,8 +142,11 @@ class IESCompute generalPurposeOffset = newOffset; // not elegant way to do it here but lets leave it as it is - updateCDescriptorSets(); - pushConstant.maxIValueReciprocal = (float)getActiveProfile().getMaxCandelaValue(); + updateCDescriptorSets(); // flush descriptor set + updateGDescriptorSets(); // flush descriptor set + + const auto& profile = getActiveProfile(); + pushConstant.maxIValue = (float)profile.getMaxCandelaValue(); } } @@ -176,18 +179,12 @@ class IESCompute }; auto& gpue = m_gpue; - - gpue.dImageIESC = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - gpue.dImageS = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - gpue.dImageD = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - gpue.dImageTMask = std::move(createGPUImageView(TEXTURE_SIZE, TEXTURE_SIZE)); - - createSSBOBuffers(); + createGPUDescriptors(); + const auto initIdx = generalPurposeOffset; // Compute { const std::vector bindings = getCBindings(); - { auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant) }; @@ -197,22 +194,6 @@ class IESCompute } { - { - { - gpue.cinfos[EB_IMAGE_IES_C].desc = core::smart_refctd_ptr(gpue.dImageIESC); - gpue.cinfos[EB_IMAGE_IES_C].image = { nullptr, asset::EIL_GENERAL }; - - gpue.cinfos[EB_IMAGE_S].desc = core::smart_refctd_ptr(gpue.dImageS); - gpue.cinfos[EB_IMAGE_S].image = { nullptr, asset::EIL_GENERAL }; - - gpue.cinfos[EB_IMAGE_D].desc = core::smart_refctd_ptr(gpue.dImageD); - gpue.cinfos[EB_IMAGE_D].image = { nullptr, asset::EIL_GENERAL }; - - gpue.cinfos[EB_IMAGE_T_MASK].desc = core::smart_refctd_ptr(gpue.dImageTMask); - gpue.cinfos[EB_IMAGE_T_MASK].image = { nullptr, asset::EIL_GENERAL }; - } - } - for (auto i = 0; i < EB_SIZE; i++) { gpue.cwrites[i].dstSet = gpue.cDescriptorSet.get(); @@ -236,14 +217,7 @@ class IESCompute // Graphics { - const std::vector bindings = - { - { EB_IMAGE_IES_C, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_S, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_D, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, - { EB_IMAGE_T_MASK, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr } - }; - + const std::vector bindings = getGBindings(); { auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); @@ -271,39 +245,23 @@ class IESCompute return driver->createGPUSampler({ asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETC_CLAMP_TO_EDGE,asset::ISampler::ETBC_FLOAT_OPAQUE_BLACK,asset::ISampler::ETF_LINEAR,asset::ISampler::ETF_LINEAR,asset::ISampler::ESMM_LINEAR,0u,false,asset::ECO_ALWAYS }); }; - _NBL_STATIC_INLINE_CONSTEXPR uint8_t NBL_D_IMAGES_AMOUNT = 4u; + gpue.sampler = createSampler(); - IGPUDescriptorSet::SDescriptorInfo infos[NBL_D_IMAGES_AMOUNT]; + for (auto i = 0; i < gpue.NBL_D_IMAGES_AMOUNT; i++) { - infos[EB_IMAGE_IES_C].desc = core::smart_refctd_ptr(gpue.dImageIESC); - infos[EB_IMAGE_IES_C].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL}; - - infos[EB_IMAGE_S].desc = core::smart_refctd_ptr(gpue.dImageS); - infos[EB_IMAGE_S].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL }; - - infos[EB_IMAGE_D].desc = core::smart_refctd_ptr(gpue.dImageD); - infos[EB_IMAGE_D].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL }; - - infos[EB_IMAGE_T_MASK].desc = core::smart_refctd_ptr(gpue.dImageTMask); - infos[EB_IMAGE_T_MASK].image = { createSampler(),asset::EIL_SHADER_READ_ONLY_OPTIMAL }; + gpue.gwrites[i].dstSet = gpue.gDescriptorSet.get(); + gpue.gwrites[i].binding = i; + gpue.gwrites[i].count = 1u; + gpue.gwrites[i].arrayElement = 0u; + gpue.gwrites[i].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; + gpue.gwrites[i].info = gpue.ginfos + i; } - video::IGPUDescriptorSet::SWriteDescriptorSet writes[NBL_D_IMAGES_AMOUNT]; - for (auto i = 0; i < NBL_D_IMAGES_AMOUNT; i++) - { - writes[i].dstSet = gpue.gDescriptorSet.get(); - writes[i].binding = i; - writes[i].count = 1u; - writes[i].arrayElement = 0u; - writes[i].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; - writes[i].info = &infos[i]; - } - - driver->updateDescriptorSets(NBL_D_IMAGES_AMOUNT, writes, 0u, nullptr); + updateGDescriptorSets(); } } - void createSSBOBuffers() + void createGPUDescriptors() { auto createCPUBuffer = [&](const auto& pInput) { @@ -326,22 +284,27 @@ class IESCompute cssbod.hAngles = createGPUBuffer(createCPUBuffer(profile.getHoriAngles())); cssbod.vAngles = createGPUBuffer(createCPUBuffer(profile.getVertAngles())); cssbod.data = createGPUBuffer(createCPUBuffer(profile.getData())); + + const auto optimalResolution = profile.getOptimalIESResolution(); + + cssbod.dImageIESC = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); + cssbod.dImageS = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); + cssbod.dImageD = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); + cssbod.dImageTMask = std::move(createGPUImageView(optimalResolution.x, optimalResolution.y)); } } void updateCDescriptorSets() { + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_IES_C]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_S]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_D]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_IMAGE_T_MASK]); + fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_HA]); fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_VA]); fillSSBODescriptorInfo(generalPurposeOffset, m_gpue.cinfos[EB_SSBO_D]); - const std::vector bindings = getCBindings(); - { - auto descriptorSetLayout = driver->createGPUDescriptorSetLayout(bindings.data(), bindings.data() + bindings.size()); - asset::SPushConstantRange range = { asset::ISpecializedShader::ESS_COMPUTE, 0u, sizeof(PushConstant) }; - m_gpue.cDescriptorSet = driver->createGPUDescriptorSet(std::move(descriptorSetLayout)); // I guess it can be done better - } - const core::smart_refctd_ptr proxy(m_gpue.cPipeline->getLayout()->getDescriptorSetLayout(0)); m_gpue.cDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); @@ -351,6 +314,22 @@ class IESCompute driver->updateDescriptorSets(EB_SIZE, m_gpue.cwrites, 0u, nullptr); } + void updateGDescriptorSets() + { + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_IES_C]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_S]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_D]); + fillImageDescriptorInfo(generalPurposeOffset, m_gpue.ginfos[EB_IMAGE_T_MASK]); + + const core::smart_refctd_ptr proxy(m_gpue.gPipeline->getLayout()->getDescriptorSetLayout(3)); + m_gpue.gDescriptorSet = core::smart_refctd_ptr(driver->createGPUDescriptorSet(core::smart_refctd_ptr(proxy))); + + for (auto i = 0; i < m_gpue.NBL_D_IMAGES_AMOUNT; i++) + m_gpue.gwrites[i].dstSet = m_gpue.gDescriptorSet.get(); + + driver->updateDescriptorSets(m_gpue.NBL_D_IMAGES_AMOUNT, m_gpue.gwrites, 0u, nullptr); + } + template void fillSSBODescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) { @@ -372,6 +351,29 @@ class IESCompute info.buffer = { 0, proxy->getSize() }; } + template + void fillImageDescriptorInfo(const size_t assetIndex, IGPUDescriptorSet::SDescriptorInfo& info) + { + static_assert(binding == EB_IMAGE_IES_C || binding == EB_IMAGE_S || binding == EB_IMAGE_D || binding == EB_IMAGE_T_MASK); + + const auto& profile = getProfile(assetIndex); + auto& cssbod = m_gpue.CSSBOD[assetIndex]; + + core::smart_refctd_ptr proxy; + + if constexpr (binding == EB_IMAGE_IES_C) + proxy = core::smart_refctd_ptr(cssbod.dImageIESC); + else if (binding == EB_IMAGE_S) + proxy = core::smart_refctd_ptr(cssbod.dImageS); + else if (binding == EB_IMAGE_D) + proxy = core::smart_refctd_ptr(cssbod.dImageD); + else + proxy = core::smart_refctd_ptr(cssbod.dImageTMask); + + info.desc = core::smart_refctd_ptr(proxy); + info.image = { core::smart_refctd_ptr(m_gpue.sampler), asset::EIL_SHADER_READ_ONLY_OPTIMAL }; + } + template auto createGPUImageView(const size_t& width, const size_t& height) { @@ -418,6 +420,19 @@ class IESCompute return bindings; } + std::vector getGBindings() + { + const std::vector bindings = + { + { EB_IMAGE_IES_C, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, + { EB_IMAGE_S, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, + { EB_IMAGE_D, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr }, + { EB_IMAGE_T_MASK, asset::EDT_COMBINED_IMAGE_SAMPLER, 1, asset::ISpecializedShader::ESS_FRAGMENT, nullptr } + }; + + return bindings; + } + template video::IFrameBuffer* createFBO(const size_t& width, const size_t& height) { @@ -434,6 +449,8 @@ class IESCompute struct GPUE { + _NBL_STATIC_INLINE_CONSTEXPR uint8_t NBL_D_IMAGES_AMOUNT = 4u; + // Compute core::smart_refctd_ptr cPipeline; core::smart_refctd_ptr cDescriptorSet; @@ -444,6 +461,7 @@ class IESCompute struct CSSBODescriptor { core::smart_refctd_ptr vAngles, hAngles, data; + core::smart_refctd_ptr dImageIESC, dImageS, dImageD, dImageTMask; }; std::vector CSSBOD; @@ -453,20 +471,19 @@ class IESCompute core::smart_refctd_ptr gDescriptorSet; core::smart_refctd_ptr mBuffer; + IGPUDescriptorSet::SDescriptorInfo ginfos[NBL_D_IMAGES_AMOUNT]; + IGPUDescriptorSet::SWriteDescriptorSet gwrites[NBL_D_IMAGES_AMOUNT]; + // Shared data - core::smart_refctd_ptr dImageIESC; - core::smart_refctd_ptr dImageS; - core::smart_refctd_ptr dImageD; - core::smart_refctd_ptr dImageTMask; + core::smart_refctd_ptr sampler; } m_gpue; #include "nbl/nblpack.h" struct PushConstant { - float maxIValueReciprocal; + float maxIValue; float zAngleDegreeRotation; IESCompute::E_MODE mode = IESCompute::EM_CDC; - uint32_t dummy; } PACK_STRUCT; #include "nbl/nblunpack.h" From 235b4ff98901ff43a1c09db24a55f91b8ee5ca4f Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 17 May 2024 11:32:11 +0200 Subject: [PATCH 08/41] temporary changes --- 22.RaytracedAO/Renderer.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 1e4b15c8b..b41019de5 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -347,20 +347,22 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh assert(meshBuffer->getInstanceCount()==instanceCount); // We'll disable certain attributes to ensure we only copy position, normal and uv attribute SVertexInputParams& vertexInput = meshBuffer->getPipeline()->getVertexInputParams(); - // but we'll pack normals and UVs together to save one SSBO binding (and quantize UVs to half floats) + // but we'll pack normals and UVs together to save one SSBO binding, but no quantization of UVs to keep accurate floating point precision for baricentrics constexpr auto freeBinding = 15u; vertexInput.attributes[combinedNormalUVAttributeIx].binding = freeBinding; - vertexInput.attributes[combinedNormalUVAttributeIx].format = EF_R32G32_UINT; + vertexInput.attributes[combinedNormalUVAttributeIx].format = EF_R32G32B32_UINT; vertexInput.attributes[combinedNormalUVAttributeIx].relativeOffset = 0u; vertexInput.enabledBindingFlags |= 0x1u<getBaseVertex(); + struct CombinedNormalUV { - uint32_t nml; - uint16_t u,v; + uint32_t normal, u, v; }; + static_assert(sizeof(CombinedNormalUV) == sizeof(float) * 3u); + auto newBuff = core::make_smart_refctd_ptr(sizeof(CombinedNormalUV)*approxVxCount); auto* dst = reinterpret_cast(newBuff->getPointer())+meshBuffer->getBaseVertex(); meshBuffer->setVertexBufferBinding({0u,newBuff},freeBinding); @@ -369,11 +371,11 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh vertexInput.attributes[normalAttr].format = EF_R32_UINT; for (auto i=0u; igetAttribute(&dst[i].nml,normalAttr,i); + meshBuffer->getAttribute(&dst[i].normal,normalAttr,i); core::vectorSIMDf uv; meshBuffer->getAttribute(uv,2u,i); - dst[i].u = core::Float16Compressor::compress(uv.x); - dst[i].v = core::Float16Compressor::compress(uv.y); + dst[i].u = uv.x; + dst[i].v = uv.y; } } From 0a6c7eeb3b00884c3bb0ac12fade532ce30de0af Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 17 May 2024 12:18:46 +0200 Subject: [PATCH 09/41] debug scene commit, find our why 0 MDI I get and allocate more space for mesh packer to handle new UV precision, now I have runtime bugs which are probably due to UVs fetching in shaders --- 22.RaytracedAO/Renderer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index b41019de5..fdd2614e2 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -318,7 +318,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh IMeshPackerV2Base::SupportedFormatsContainer formats; formats.insert(EF_R32G32B32_SFLOAT); - formats.insert(EF_R32G32_UINT); + formats.insert(EF_R32G32B32_UINT); auto cpump = core::make_smart_refctd_ptr>(allocParams,formats,minTrisBatch,maxTrisBatch); uint32_t mdiBoundMax=0u,batchInstanceBoundTotal=0u; core::vector allocData; From 6ec449ce0661d7a8931c623b36a90c2f935361b8 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 17 May 2024 14:12:53 +0200 Subject: [PATCH 10/41] update virtualGeometry.glsl to fetch combined UV+Normal attribute with nbl_glsl_VG_attribFetch3u, perform tests --- 22.RaytracedAO/virtualGeometry.glsl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/22.RaytracedAO/virtualGeometry.glsl b/22.RaytracedAO/virtualGeometry.glsl index b500124aa..b69a553a3 100644 --- a/22.RaytracedAO/virtualGeometry.glsl +++ b/22.RaytracedAO/virtualGeometry.glsl @@ -27,15 +27,16 @@ vec3 nbl_glsl_fetchVtxPos(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance vec3 nbl_glsl_fetchVtxNormal(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) { nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit; - const uint codedNormal = nbl_glsl_VG_attribFetch2u(va,vtxID)[0]; + const uint codedNormal = nbl_glsl_VG_attribFetch3u(va,vtxID)[0]; return normalize(nbl_glsl_decodeRGB10A2_SNORM(codedNormal).xyz); } vec2 nbl_glsl_fetchVtxUV(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) { nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit; - const uint codedUV = nbl_glsl_VG_attribFetch2u(va,vtxID)[1]; - return unpackHalf2x16(codedUV).xy; + const vec2 codedUV = nbl_glsl_VG_attribFetch3u(va,vtxID).yz; + + return codedUV; } From 9472e56345b726a4c18a06734e299f3bb51c518b Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Fri, 17 May 2024 14:23:53 +0200 Subject: [PATCH 11/41] fix a type typo bug in CombinedNormalUV - use floats for UV. Also use uintBitsToFloat in virtualGeometry.glsl to unpack UVs from unsigned type vector --- 22.RaytracedAO/Renderer.cpp | 3 ++- 22.RaytracedAO/virtualGeometry.glsl | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index fdd2614e2..6b0e45254 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -359,7 +359,8 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh struct CombinedNormalUV { - uint32_t normal, u, v; + uint32_t normal; + float u, v; }; static_assert(sizeof(CombinedNormalUV) == sizeof(float) * 3u); diff --git a/22.RaytracedAO/virtualGeometry.glsl b/22.RaytracedAO/virtualGeometry.glsl index b69a553a3..422c939f9 100644 --- a/22.RaytracedAO/virtualGeometry.glsl +++ b/22.RaytracedAO/virtualGeometry.glsl @@ -34,9 +34,8 @@ vec3 nbl_glsl_fetchVtxNormal(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_insta vec2 nbl_glsl_fetchVtxUV(in uint vtxID, in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchInstanceData) { nbl_glsl_VG_VirtualAttributePacked_t va = batchInstanceData.determinantSignBit; - const vec2 codedUV = nbl_glsl_VG_attribFetch3u(va,vtxID).yz; - - return codedUV; + const uvec2 codedUV = nbl_glsl_VG_attribFetch3u(va,vtxID).yz; + return vec2(uintBitsToFloat(codedUV.x), uintBitsToFloat(codedUV.y)); } From bc0bb127b9218c64bfd9fd3400123a03145c3d7d Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 4 Jun 2024 20:36:13 +0200 Subject: [PATCH 12/41] Improve fragmentation of mesh data, make more scenes fit in 2GB-1 byte of SSBO --- 22.RaytracedAO/Renderer.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 6b0e45254..813e85605 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -293,7 +293,7 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh // one instance data per instance of a batch core::smart_refctd_ptr newInstanceDataBuffer; - constexpr uint16_t minTrisBatch = MAX_TRIANGLES_IN_BATCH>>1u; + constexpr uint16_t minTrisBatch = MAX_TRIANGLES_IN_BATCH>>3u; // allow small allocations to fight fragmentation constexpr uint16_t maxTrisBatch = MAX_TRIANGLES_IN_BATCH; constexpr uint8_t minVertexSize = asset::getTexelOrBlockBytesize()+ @@ -304,8 +304,8 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh constexpr uint16_t minIndicesBatch = minTrisBatch*kIndicesPerTriangle; CPUMeshPacker::AllocationParams allocParams; - allocParams.vertexBuffSupportedByteSize = 1u<<31u; - allocParams.vertexBufferMinAllocByteSize = minTrisBatch*minVertexSize; + allocParams.vertexBuffSupportedByteSize = (1u<<31u)-1; // RTX cards + allocParams.vertexBufferMinAllocByteSize = minTrisBatch*minVertexSize; // under max vertex reuse allocParams.indexBuffSupportedCnt = (allocParams.vertexBuffSupportedByteSize/allocParams.vertexBufferMinAllocByteSize)*minIndicesBatch; allocParams.indexBufferMinAllocCnt = minIndicesBatch; allocParams.MDIDataBuffSupportedCnt = allocParams.indexBuffSupportedCnt/minIndicesBatch; @@ -404,7 +404,11 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh allocData.resize(meshBuffersToProcess.size()); - cpump->alloc(allocData.data(),meshBuffersToProcess.begin(),meshBuffersToProcess.end()); + if (!cpump->alloc(allocData.data(),meshBuffersToProcess.begin(),meshBuffersToProcess.end())) + { + printf("[ERROR] Failed to Allocate Mesh data in SSBOs, quitting!\n"); + exit(-42); + } cpump->shrinkOutputBuffersSize(); cpump->instantiateDataStorage(); From c504bbec1300bdcfa578bd608d6bb42296abdaf5 Mon Sep 17 00:00:00 2001 From: AnastaZIuk Date: Tue, 10 Dec 2024 15:20:57 +0100 Subject: [PATCH 13/41] update media submodule --- media | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/media b/media index 2b8e2a506..f9521cebc 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 2b8e2a50640597faaf9c6cfaaddeff321b8e41fa +Subproject commit f9521cebc4f12ad9fa6b7b4dd53f0a5305a9533b From a350a53a6da4a7c3f548e9c7737f3d6a0b9bd98b Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 11 Dec 2024 09:40:27 +0100 Subject: [PATCH 14/41] add a mask AOV, now need to configure the visibility of things for primary rays --- 22.RaytracedAO/Renderer.cpp | 54 +++++++++++++++++------------- 22.RaytracedAO/Renderer.h | 1 + 22.RaytracedAO/closestHit.comp | 1 + 22.RaytracedAO/raygen.comp | 2 ++ 22.RaytracedAO/raytraceCommon.glsl | 30 +++++++++++++++-- 22.RaytracedAO/resolve.comp | 12 ++++--- 6 files changed, 70 insertions(+), 30 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 813e85605..46181a9b5 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -117,7 +117,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I } { - constexpr auto raytracingCommonDescriptorCount = 10u; + constexpr auto raytracingCommonDescriptorCount = 11u; IGPUDescriptorSetLayout::SBinding bindings[raytracingCommonDescriptorCount]; fillIotaDescriptorBindingDeclarations(bindings,ISpecializedShader::ESS_COMPUTE,raytracingCommonDescriptorCount); bindings[0].type = asset::EDT_UNIFORM_BUFFER; @@ -127,9 +127,10 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I bindings[4].type = asset::EDT_STORAGE_BUFFER; bindings[5].type = asset::EDT_STORAGE_IMAGE; bindings[6].type = asset::EDT_STORAGE_IMAGE; - bindings[7].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[7].type = asset::EDT_STORAGE_IMAGE; bindings[8].type = asset::EDT_COMBINED_IMAGE_SAMPLER; bindings[9].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[10].type = asset::EDT_COMBINED_IMAGE_SAMPLER; m_commonRaytracingDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+raytracingCommonDescriptorCount); } @@ -158,7 +159,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_closestHitDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+closestHitDescriptorCount); } { - constexpr auto resolveDescriptorCount = 7u; + constexpr auto resolveDescriptorCount = 8u; IGPUDescriptorSetLayout::SBinding bindings[resolveDescriptorCount]; fillIotaDescriptorBindingDeclarations(bindings,ISpecializedShader::ESS_COMPUTE,resolveDescriptorCount); bindings[0].type = asset::EDT_UNIFORM_BUFFER; @@ -168,9 +169,11 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I bindings[2].samplers = &sampler; bindings[3].type = asset::EDT_COMBINED_IMAGE_SAMPLER; bindings[3].samplers = &sampler; - bindings[4].type = asset::EDT_STORAGE_IMAGE; + bindings[4].type = asset::EDT_COMBINED_IMAGE_SAMPLER; + bindings[4].samplers = &sampler; bindings[5].type = asset::EDT_STORAGE_IMAGE; bindings[6].type = asset::EDT_STORAGE_IMAGE; + bindings[7].type = asset::EDT_STORAGE_IMAGE; m_resolveDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+resolveDescriptorCount); } @@ -1395,11 +1398,12 @@ void Renderer::initScreenSizedResources( m_accumulation = createScreenSizedTexture(EF_R32G32_UINT,(cascadeCount+1u)*m_staticViewData.samplesPerPixelPerDispatch); // one more (first) layer because of accumulation metadata for a path m_albedoAcc = createScreenSizedTexture(EF_R32_UINT,m_staticViewData.samplesPerPixelPerDispatch); m_normalAcc = createScreenSizedTexture(EF_R32_UINT,m_staticViewData.samplesPerPixelPerDispatch); + m_maskAcc = createScreenSizedTexture(EF_R16_UNORM,m_staticViewData.samplesPerPixelPerDispatch); m_tonemapOutput = createScreenSizedTexture(EF_R16G16B16A16_SFLOAT); m_albedoRslv = createScreenSizedTexture(EF_A2B10G10R10_UNORM_PACK32); m_normalRslv = createScreenSizedTexture(EF_R16G16B16A16_SFLOAT); - constexpr uint32_t MaxDescritorUpdates = 10u; + constexpr uint32_t MaxDescritorUpdates = 11u; IGPUDescriptorSet::SDescriptorInfo infos[MaxDescritorUpdates]; IGPUDescriptorSet::SWriteDescriptorSet writes[MaxDescritorUpdates]; @@ -1416,20 +1420,21 @@ void Renderer::initScreenSizedResources( setImageInfo(infos+2,asset::EIL_GENERAL,core::smart_refctd_ptr(m_accumulation)); setImageInfo(infos+5,asset::EIL_GENERAL,core::smart_refctd_ptr(m_albedoAcc)); setImageInfo(infos+6,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalAcc)); + setImageInfo(infos+7,asset::EIL_GENERAL,core::smart_refctd_ptr(m_maskAcc)); // envmap { - setImageInfo(infos+7,asset::EIL_GENERAL,core::smart_refctd_ptr(m_finalEnvmap)); + setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(m_finalEnvmap)); ISampler::SParams samplerParams = { ISampler::ETC_REPEAT, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - infos[7].image.sampler = m_driver->createGPUSampler(samplerParams); - infos[7].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; + infos[8].image.sampler = m_driver->createGPUSampler(samplerParams); + infos[8].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; } // warpmap { - setImageInfo(infos+8,asset::EIL_GENERAL,core::smart_refctd_ptr(warpMap)); + setImageInfo(infos+9,asset::EIL_GENERAL,core::smart_refctd_ptr(warpMap)); ISampler::SParams samplerParams = { ISampler::ETC_REPEAT, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - infos[8].image.sampler = m_driver->createGPUSampler(samplerParams); - infos[8].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; + infos[9].image.sampler = m_driver->createGPUSampler(samplerParams); + infos[9].image.imageLayout = EIL_SHADER_READ_ONLY_OPTIMAL; } IGPUDescriptorSet::SDescriptorInfo luminanceDescriptorInfo = {}; @@ -1449,7 +1454,7 @@ void Renderer::initScreenSizedResources( for (auto i=0u; i<2u; i++) m_commonRaytracingDS[i] = m_driver->createGPUDescriptorSet(core::smart_refctd_ptr(m_commonRaytracingDSLayout)); - constexpr auto descriptorUpdateCount = 10u; + constexpr auto descriptorUpdateCount = 11u; setDstSetAndDescTypesOnWrites(m_commonRaytracingDS[0].get(),writes,infos,{ EDT_UNIFORM_BUFFER, EDT_UNIFORM_TEXEL_BUFFER, @@ -1458,17 +1463,18 @@ void Renderer::initScreenSizedResources( EDT_STORAGE_BUFFER, EDT_STORAGE_IMAGE, EDT_STORAGE_IMAGE, + EDT_STORAGE_IMAGE, EDT_COMBINED_IMAGE_SAMPLER, EDT_COMBINED_IMAGE_SAMPLER, }); // Set last write - writes[9].binding = 9u; - writes[9].arrayElement = 0u; - writes[9].count = 1u; - writes[9].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; - writes[9].dstSet = m_commonRaytracingDS[0].get(); - writes[9].info = &luminanceDescriptorInfo; + writes[10].binding = 9u; + writes[10].arrayElement = 0u; + writes[10].count = 1u; + writes[10].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; + writes[10].dstSet = m_commonRaytracingDS[0].get(); + writes[10].info = &luminanceDescriptorInfo; m_driver->updateDescriptorSets(descriptorUpdateCount,writes,0u,nullptr); // set up second DS @@ -1545,23 +1551,24 @@ void Renderer::initScreenSizedResources( } setImageInfo(infos+2,asset::EIL_GENERAL,std::move(albedoSamplerView)); setImageInfo(infos+3,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalAcc)); - setImageInfo(infos+4,asset::EIL_GENERAL,core::smart_refctd_ptr(m_tonemapOutput)); + setImageInfo(infos+4,asset::EIL_GENERAL,core::smart_refctd_ptr(m_maskAcc)); + setImageInfo(infos+5,asset::EIL_GENERAL,core::smart_refctd_ptr(m_tonemapOutput)); core::smart_refctd_ptr albedoStorageView; { IGPUImageView::SCreationParams viewparams = m_albedoRslv->getCreationParameters(); viewparams.format = EF_R32_UINT; albedoStorageView = m_driver->createGPUImageView(std::move(viewparams)); } - setImageInfo(infos+5,asset::EIL_GENERAL,std::move(albedoStorageView)); - setImageInfo(infos+6,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalRslv)); + setImageInfo(infos+6,asset::EIL_GENERAL,std::move(albedoStorageView)); + setImageInfo(infos+7,asset::EIL_GENERAL,core::smart_refctd_ptr(m_normalRslv)); setDstSetAndDescTypesOnWrites(m_resolveDS.get(),writes,infos,{ EDT_UNIFORM_BUFFER, - EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER, + EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER,EDT_COMBINED_IMAGE_SAMPLER, EDT_STORAGE_IMAGE,EDT_STORAGE_IMAGE,EDT_STORAGE_IMAGE }); } - m_driver->updateDescriptorSets(7u,writes,0u,nullptr); + m_driver->updateDescriptorSets(8u,writes,0u,nullptr); m_visibilityBuffer = m_driver->addFrameBuffer(); m_visibilityBuffer->attach(EFAP_DEPTH_ATTACHMENT,createScreenSizedTexture(EF_D32_SFLOAT)); @@ -1609,6 +1616,7 @@ void Renderer::deinitScreenSizedResources() m_accumulation = m_tonemapOutput = nullptr; m_albedoAcc = m_albedoRslv = nullptr; m_normalAcc = m_normalRslv = nullptr; + m_maskAcc = nullptr; glFinish(); diff --git a/22.RaytracedAO/Renderer.h b/22.RaytracedAO/Renderer.h index 912eadd7b..6a05eeb40 100644 --- a/22.RaytracedAO/Renderer.h +++ b/22.RaytracedAO/Renderer.h @@ -254,6 +254,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_accumulation,m_tonemapOutput; nbl::core::smart_refctd_ptr m_albedoAcc,m_albedoRslv; nbl::core::smart_refctd_ptr m_normalAcc,m_normalRslv; + nbl::core::smart_refctd_ptr m_maskAcc; nbl::video::IFrameBuffer* m_visibilityBuffer,* m_colorBuffer; // Resources used for envmap sampling diff --git a/22.RaytracedAO/closestHit.comp b/22.RaytracedAO/closestHit.comp index a801b48d3..05a30ae71 100644 --- a/22.RaytracedAO/closestHit.comp +++ b/22.RaytracedAO/closestHit.comp @@ -134,5 +134,6 @@ void main() // addAlbedo(contrib.albedo*aovThroughput,accumulationLocation); addWorldspaceNormal(contrib.worldspaceNormal*nbl_glsl_MC_colorToScalar(aovThroughput),accumulationLocation); + addMask(0.f,accumulationLocation); } } \ No newline at end of file diff --git a/22.RaytracedAO/raygen.comp b/22.RaytracedAO/raygen.comp index 71e6ff7f2..90ce0a1a5 100644 --- a/22.RaytracedAO/raygen.comp +++ b/22.RaytracedAO/raygen.comp @@ -132,11 +132,13 @@ void main() { storeAlbedo(contrib.albedo,coord); storeWorldspaceNormal(contrib.worldspaceNormal,coord); + storeMask(hit ? 0.f:1.f,coord); } else { addAlbedo(contrib.albedo,coord,pc.cummon.rcpFramesDispatched); addWorldspaceNormal(contrib.worldspaceNormal,coord,pc.cummon.rcpFramesDispatched); + addMask(hit ? 0.f:1.f,coord,pc.cummon.rcpFramesDispatched); } } } diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index fe7fc667c..b13b4bb47 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -44,10 +44,11 @@ layout(set = 2, binding = 4) restrict coherent buffer RayCount // maybe remove c // aovs layout(set = 2, binding = 5, r32ui) restrict uniform uimage2DArray albedoAOV; layout(set = 2, binding = 6, r32ui) restrict uniform uimage2DArray normalAOV; +layout(set = 2, binding = 7, r16) restrict uniform image2DArray maskAOV; // environment emitter -layout(set = 2, binding = 7) uniform sampler2D envMap; -layout(set = 2, binding = 8) uniform sampler2D warpMap; -layout(set = 2, binding = 9) uniform sampler2D luminance; +layout(set = 2, binding = 8) uniform sampler2D envMap; +layout(set = 2, binding = 9) uniform sampler2D warpMap; +layout(set = 2, binding = 10) uniform sampler2D luminance; void clear_raycount() { @@ -174,6 +175,29 @@ void addWorldspaceNormal(vec3 delta, in uvec3 coord) impl_addWorldspaceNormal(delta,coord,0.f,false); } +void storeMask(in float mask, in uvec3 coord) +{ + imageStore(maskAOV,ivec3(coord),vec4(mask,0.f,0.f,0.f)); +} +void impl_addMask(float delta, in uvec3 coord, in float rcpN, in bool newSample) +{ + const float prev = imageLoad(maskAOV,ivec3(coord)).r; + if (newSample) + delta = (delta-prev)*rcpN; + if (abs(delta)>1.f/65536.f) + storeMask(prev+delta,coord); +} +// for starting a new sample +void addMask(float delta, in uvec3 coord, in float rcpN) +{ + impl_addMask(delta,coord,rcpN,true); +} +// for adding to the last sample +void addMask(float delta, in uvec3 coord) +{ + impl_addMask(delta,coord,0.f,false); +} + // due to memory limitations we can only do 6k renders // so that's 13 bits for width, 12 bits for height, which leaves us with 7 bits for throughput void packOutPixelLocationAndAoVThroughputFactor(out float val, in uvec2 outPixelLocation, in float aovThroughputFactor) diff --git a/22.RaytracedAO/resolve.comp b/22.RaytracedAO/resolve.comp index b46b0f725..c36b631c3 100644 --- a/22.RaytracedAO/resolve.comp +++ b/22.RaytracedAO/resolve.comp @@ -13,9 +13,10 @@ layout(set = 0, binding = 0, row_major) uniform StaticViewData layout(set = 0, binding = 1) uniform usampler2DArray colorSamples; layout(set = 0, binding = 2) uniform sampler2DArray albedoSamples; layout(set = 0, binding = 3) uniform usampler2DArray normalSamples; -layout(set = 0, binding = 4, rgba16f) restrict uniform image2D framebuffer; -layout(set = 0, binding = 5, r32ui) restrict uniform uimage2D albedo; -layout(set = 0, binding = 6, rgba16f) restrict uniform image2D normals; +layout(set = 0, binding = 4) uniform sampler2DArray maskSamples; +layout(set = 0, binding = 5, rgba16f) restrict uniform image2D framebuffer; +layout(set = 0, binding = 6, r32ui) restrict uniform uimage2D albedo; +layout(set = 0, binding = 7, rgba16f) restrict uniform image2D normals; layout(push_constant, row_major) uniform PushConstants { @@ -71,19 +72,22 @@ void main() vec3 acc = nbl_glsl_RWMC_reweight(pc.rwmcReweightingParams,pixelCoord); vec3 alb = texelFetch(albedoSamples,ivec3(pixelCoord,0),0).rgb; vec3 nml = nbl_glsl_decodeRGB10A2_SNORM(texelFetch(normalSamples,ivec3(pixelCoord,0),0).r).xyz; + float msk = texelFetch(maskSamples,ivec3(pixelCoord,0),0).r; for (uint i=1u; i Date: Wed, 11 Dec 2024 15:27:25 +0100 Subject: [PATCH 15/41] denoiser and bloom handle alpha now --- 39.DenoiserTonemapper/ShaderCommon.glsl | 12 +- 39.DenoiserTonemapper/main.cpp | 168 ++++++++++++------------ 2 files changed, 86 insertions(+), 94 deletions(-) diff --git a/39.DenoiserTonemapper/ShaderCommon.glsl b/39.DenoiserTonemapper/ShaderCommon.glsl index da7e08f1e..dd4bae7ba 100644 --- a/39.DenoiserTonemapper/ShaderCommon.glsl +++ b/39.DenoiserTonemapper/ShaderCommon.glsl @@ -37,13 +37,7 @@ uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType() #define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ 16384 -#define SHARED_CHANNELS 3 -struct f16vec3_packed -{ - float16_t x; - float16_t y; - float16_t z; -}; +//#define SHARED_CHANNELS 3 // luma metering stuff @@ -91,10 +85,10 @@ struct f16vec3_packed return int((~pc.data.flags)&0x1u); } - vec3 globalPixelData; + vec4 globalPixelData; vec3 nbl_glsl_ext_LumaMeter_getColor(bool wgExecutionMask) { - return globalPixelData; + return globalPixelData.rgb; } #else #include "nbl/builtin/glsl/ext/LumaMeter/common.glsl" diff --git a/39.DenoiserTonemapper/main.cpp b/39.DenoiserTonemapper/main.cpp index e5ef6dde5..f91a9434a 100644 --- a/39.DenoiserTonemapper/main.cpp +++ b/39.DenoiserTonemapper/main.cpp @@ -80,7 +80,7 @@ int main(int argc, char* argv[]) params.Doublebuffer = true; params.Stencilbuffer = false; // TODO: this is a temporary fix for a problem solved in the Vulkan Branch - params.StreamingUploadBufferSize = 1024*1024*1024; // for Color + 2 AoV of 8k images + params.StreamingUploadBufferSize = (1024+512)*1024*1024; // for Color + 2 AoV of 8k images params.StreamingDownloadBufferSize = core::roundUp(params.StreamingUploadBufferSize/3u,256u); // for output image auto device = createDeviceEx(params); @@ -130,30 +130,10 @@ int main(int argc, char* argv[]) if (check_error(!m_optixContext, "Could not create Optix Context!")) return error_code; - constexpr auto forcedOptiXFormat = OPTIX_PIXEL_FORMAT_HALF3; // TODO: make more denoisers with formats - E_FORMAT nblFmtRequired = EF_UNKNOWN; - switch (forcedOptiXFormat) - { - case OPTIX_PIXEL_FORMAT_UCHAR3: - nblFmtRequired = EF_R8G8B8_SRGB; - break; - case OPTIX_PIXEL_FORMAT_UCHAR4: - nblFmtRequired = EF_R8G8B8A8_SRGB; - break; - case OPTIX_PIXEL_FORMAT_HALF3: - nblFmtRequired = EF_R16G16B16_SFLOAT; - break; - case OPTIX_PIXEL_FORMAT_HALF4: - nblFmtRequired = EF_R16G16B16A16_SFLOAT; - break; - case OPTIX_PIXEL_FORMAT_FLOAT3: - nblFmtRequired = EF_R32G32B32_SFLOAT; - break; - case OPTIX_PIXEL_FORMAT_FLOAT4: - nblFmtRequired = EF_R32G32B32A32_SFLOAT; - break; - } - constexpr auto forcedOptiXFormatPixelStride = 6u; + // TODO: make more denoisers with formats + constexpr OptixPixelFormat forcedOptiXFormats[] = {OPTIX_PIXEL_FORMAT_HALF4,OPTIX_PIXEL_FORMAT_HALF3,OPTIX_PIXEL_FORMAT_HALF3}; + const uint32_t forcedOptiXFormatPixelStrides[] = {8,6,6}; + const uint32_t forcedOptiXFormatPixelCumExclSizes[] = {0,8,14,20}; DenoiserToUse denoisers[EII_COUNT]; { OptixDenoiserOptions opts = { OPTIX_DENOISER_INPUT_RGB }; @@ -175,6 +155,7 @@ int main(int argc, char* argv[]) using ToneMapperClass = ext::ToneMapper::CToneMapper; constexpr uint32_t kComputeWGSize = FFTClass::DEFAULT_WORK_GROUP_SIZE; // if it changes, maybe it breaks stuff + constexpr uint32_t allChannelsFFT = 4u; constexpr uint32_t colorChannelsFFT = 3u; constexpr bool usingHalfFloatFFTStorage = false; @@ -344,18 +325,16 @@ layout(binding = 0, std430) restrict readonly buffer ImageInputBuffer } inBuffers[EII_COUNT]; layout(binding = 1, std430) restrict writeonly buffer ImageOutputBuffer { - f16vec3_packed data[]; + float16_t data[]; } outBuffers[EII_COUNT]; -vec3 fetchData(in uvec3 texCoord) +vec4 fetchData(in uvec3 texCoord) { - vec3 data = vec4(inBuffers[texCoord.z].data[texCoord.y*pc.data.inImageTexelPitch[texCoord.z]+texCoord.x]).xyz; - bool invalid = any(isnan(data))||any(isinf(abs(data))); + vec4 data = vec4(inBuffers[texCoord.z].data[texCoord.y*pc.data.inImageTexelPitch[texCoord.z]+texCoord.x]); + const bool invalid = any(isnan(data.rgb))||any(isinf(abs(data.rgb))); if (texCoord.z==EII_ALBEDO) - data = invalid ? vec3(1.0):data; + data.rgb = invalid ? vec3(1.0):data.rgb; else if (texCoord.z==EII_NORMAL) - { - data = invalid||length(data)<0.000000001 ? vec3(0.0,0.0,1.0):normalize(pc.data.normalMatrix*data); - } + data.xyz = invalid||length(data.xyz)<0.000000001 ? vec3(0.0,0.0,1.0):normalize(pc.data.normalMatrix*data.xyz); return data; } void main() @@ -367,10 +346,12 @@ void main() nbl_glsl_ext_LumaMeter(colorLayer && gl_GlobalInvocationID.xcreateGPUShader(core::make_smart_refctd_ptr(R"===( @@ -428,12 +409,12 @@ void main() #include "../ShaderCommon.glsl" layout(binding = 0, std430) restrict readonly buffer DenoisedImageInputBuffer { - f16vec3_packed inDenoisedBuffer[]; + uvec2 inDenoisedBuffer[]; }; #define _NBL_GLSL_EXT_FFT_INPUT_DESCRIPTOR_DEFINED_ layout(binding = 1, std430) restrict buffer NoisyImageInputBufferAndSpectrumOutputBuffer { - uint16_t data[]; + uvec2 data[]; } aliasedBuffer[2]; #define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_ @@ -466,12 +447,7 @@ uint nbl_glsl_ext_FFT_Parameters_t_getDirection() void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value) { const uint index = ((channel<>16); - aliasedBuffer[1].data[index*4+2] = uint16_t(asUint.y&0xffffu); - aliasedBuffer[1].data[index*4+3] = uint16_t(asUint.y>>16); + aliasedBuffer[1].data[index] = floatBitsToUint(complex_value); } #define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_ @@ -480,7 +456,7 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_ #include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp" -vec3 preloadedPixels[(_NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_-1u)/_NBL_GLSL_WORKGROUP_SIZE_+1u]; +vec4 preloadedPixels[(_NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_-1u)/_NBL_GLSL_WORKGROUP_SIZE_+1u]; void main() { @@ -502,21 +478,25 @@ void main() ivec3 coordinate = oldCoord; nbl_glsl_ext_FFT_wrap_coord(coordinate); // const uint index = coordinate.y*pc.data.imageWidth+coordinate.x; - const vec3 denoised = vec3(inDenoisedBuffer[index].x,inDenoisedBuffer[index].y,inDenoisedBuffer[index].z); - vec3 noisy; - for (uint c=0; c<3; c++) - noisy[c] = unpackHalf2x16(uint(aliasedBuffer[0].data[index*3+c]))[0]; + const uvec2 denoisedData = inDenoisedBuffer[index]; + const vec4 denoised = vec4(unpackHalf2x16(denoisedData[0]),unpackHalf2x16(denoisedData[1])); + vec4 noisy; + { + uvec2 noisyData = aliasedBuffer[0].data[index]; + noisy.rg = unpackHalf2x16(noisyData[0]); + noisy.ba = unpackHalf2x16(noisyData[1]); // error "warning C7050: "noisy.zw" might be used before being initialized" is wrong + } preloadedPixels[t] = mix(denoised,noisy,pc.data.denoiseBlendFactor); // const bool contributesToLuma = all(equal(coordinate,oldCoord)); - scaledLogLuma += nbl_glsl_ext_LumaMeter_local_process(contributesToLuma,preloadedPixels[t]); + scaledLogLuma += nbl_glsl_ext_LumaMeter_local_process(contributesToLuma,preloadedPixels[t].rgb); } nbl_glsl_ext_LumaMeter_setFirstPassOutput(nbl_glsl_ext_LumaMeter_workgroup_process(scaledLogLuma)); // prevent overlap between different usages of shared memory barrier(); // Virtual Threads Calculation - for(uint channel=0u; channel<3u; channel++) + for(uint channel=0u; channel<4u; channel++) { for (uint t=0u; tgetAsset("../../media/kernels/physical_flare_512.exr",lp); // TODO: make it a builtins? - for (size_t i=0; i < inputFilesAmount; i++) + for (size_t i=0; igetRegions(); + // no mip chain, etc. assert(regions.begin()+1u==regions.end()); const auto& region = regions.begin()[0]; + // there is an explicit buffer row length assert(region.bufferRowLength); outParam.colorTexelSize = asset::getTexelOrBlockBytesize(colorCreationParams.format); } @@ -1028,6 +1018,8 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe { auto kerDim = outParam.kernel->getCreationParameters().extent; float kernelScale,minKernelScale; + // portrait vs landscape, get smallest dimension + // the kernelScale makes sure that resampled kernel resolution will match the image to be blurred scaled by `bloomRelativeScale` if (extent.width1.f) os::Printer::log(imageIDString + "Bloom Kernel loose sharpness, increase resolution of bloom kernel or reduce its relative scale!", ELL_WARNING); + // kernel cannot be smaller than 2x2 else if (kernelScale auto { auto tmp = extent; @@ -1058,14 +1052,16 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe } return tmp; }(); + // we abuse the same buffer as temporary storage for the Kernel FFT (two spans needed) fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,outParam.scaledKernelExtent,colorChannelsFFT)*2u,fftScratchSize); - fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,marginSrcDim,colorChannelsFFT),fftScratchSize); + // and for the main image FFT (alpha included) + fftScratchSize = core::max(FFTClass::getOutputBufferSize(usingHalfFloatFFTStorage,marginSrcDim,allChannelsFFT),fftScratchSize); // TODO: maybe move them to nested loop and compute JIT { auto* fftPushConstants = outParam.fftPushConstants; auto* fftDispatchInfo = outParam.fftDispatchInfo; const ISampler::E_TEXTURE_CLAMP fftPadding[2] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR}; - const auto passes = FFTClass::buildParameters(false,colorChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim); + const auto passes = FFTClass::buildParameters(false,allChannelsFFT,extent,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim); { // override for less work and storage (dont need to store the extra padding of the last axis after iFFT) fftPushConstants[1].output_strides.x = fftPushConstants[0].input_strides.x; @@ -1081,6 +1077,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe } fftDispatchInfo[2] = fftDispatchInfo[0]; } + // only a 2D FFT assert(passes==2); } @@ -1103,6 +1100,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe { os::Printer::log(imageIDString + "Image extent of the Albedo Channel does not match the Color Channel, Albedo Channel will not be used!", ELL_ERROR); albedoImage = nullptr; + continue; } else outParam.denoiserType = EII_ALBEDO; @@ -1144,7 +1142,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe size_t denoiserStateBufferSize = 0ull; { size_t scratchBufferSize = fftScratchSize; - size_t tempBufferSize = fftScratchSize; + size_t tempBufferSize = forcedOptiXFormatPixelCumExclSizes[EII_COUNT]*maxResolution[0]*maxResolution[1]; for (uint32_t i=0u; igetCreationParameters(); - assert(asset::getTexelOrBlockBytesize(creationParameters.format)==param.colorTexelSize); // set up some image pitch and offset info shaderConstants.inImageTexelPitch[j] = image->getRegions().begin()[0].bufferRowLength; inImageByteOffset[j] = offsetPair->getOffset(); @@ -1460,8 +1457,8 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe // always need at least two input noisy buffers due to having to keep noisy colour around for (uint32_t j=0u; jtileAndInvoke( m_cudaStream, @@ -1618,6 +1615,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe // image view core::smart_refctd_ptr imageView; + // size needed to download denoised, bloomed and tonemapped image const uint32_t colorBufferBytesize = param.height*param.width*param.colorTexelSize; { // create image @@ -1783,7 +1781,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe // convert to EF_R8G8B8_SRGB and save it as .png and .jpg { - auto newImageView = getConvertedImageView(imageView->getCreationParameters().image, EF_R8G8B8_SRGB); + auto newImageView = getConvertedImageView(imageView->getCreationParameters().image, EF_R8G8B8A8_SRGB); IAssetWriter::SAssetWriteParams wp(newImageView.get()); std::string fileName = outputFileBundle[i].value().c_str(); From 9df4dfbf92339c46f2ef4b9569ba8d01fddb9164 Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 11 Dec 2024 15:31:30 +0100 Subject: [PATCH 16/41] oops forgot to re-enable denoiser --- 39.DenoiserTonemapper/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/39.DenoiserTonemapper/main.cpp b/39.DenoiserTonemapper/main.cpp index f91a9434a..9864dec4d 100644 --- a/39.DenoiserTonemapper/main.cpp +++ b/39.DenoiserTonemapper/main.cpp @@ -1543,7 +1543,7 @@ nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(ivec3 coordinate, in uint channe denoiserOutput.pixelStrideInBytes = forcedOptiXFormatPixelStrides[0]; denoiserOutput.rowStrideInBytes = param.width * denoiserOutput.pixelStrideInBytes; denoiserOutput.format = forcedOptiXFormats[0]; -#if 0 // for easy debug with renderdoc disable optix stuff +#if 1 // for easy debug with renderdoc disable optix stuff //invoke if (denoiser.m_denoiser->tileAndInvoke( m_cudaStream, From b2ef17a766a19755be768d9c24b8a92afda48607 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 12 Dec 2024 09:09:19 +0100 Subject: [PATCH 17/41] get the transmission alpha working nicely --- 22.RaytracedAO/closestHit.comp | 13 ++++++++++++- 39.DenoiserTonemapper/main.cpp | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/22.RaytracedAO/closestHit.comp b/22.RaytracedAO/closestHit.comp index 05a30ae71..cc576a895 100644 --- a/22.RaytracedAO/closestHit.comp +++ b/22.RaytracedAO/closestHit.comp @@ -134,6 +134,17 @@ void main() // addAlbedo(contrib.albedo*aovThroughput,accumulationLocation); addWorldspaceNormal(contrib.worldspaceNormal*nbl_glsl_MC_colorToScalar(aovThroughput),accumulationLocation); - addMask(0.f,accumulationLocation); + // only misses contribute to transparency + float mask = 0.f; + if (!hit) + { + // make the luma of throughput dictate transparency + mask = dot(aovThroughput,transpose(nbl_glsl_sRGBtoXYZ)[1]); + // only count transmissions + const vec2 texCoordUV = (vec2(accumulationLocation.xy)+vec2(0.5))/vec2(getImageDimensions(staticViewData)); + const vec3 seeThroughDir = normalize(mat3(pc.cummon.viewDirReconFactors)*vec3(texCoordUV,1.f)); + mask *= pow(max(dot(normalizedV,seeThroughDir),0.f),1024.f); + } + addMask(mask,accumulationLocation); } } \ No newline at end of file diff --git a/39.DenoiserTonemapper/main.cpp b/39.DenoiserTonemapper/main.cpp index 9864dec4d..6b0d5d43f 100644 --- a/39.DenoiserTonemapper/main.cpp +++ b/39.DenoiserTonemapper/main.cpp @@ -587,6 +587,8 @@ void convolve(in uint item_per_thread_count, in uint ch) convSpectrum *= nbl_glsl_sRGBtoXYZ[2][1]; for (uint c=0; c<2; c++) convSpectrum += textureLod(NormalizedKernel[c],uv,0).xy*nbl_glsl_sRGBtoXYZ[c][1]; + // small boost because the spectra don't normalize ideally + convSpectrum *= 1.2f; } nbl_glsl_ext_FFT_impl_values[t] = nbl_glsl_complex_mul(sourceSpectrum,convSpectrum); } From 802e8de11d4062bbbcc041ca80788b47090529e6 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 12 Dec 2024 10:21:01 +0100 Subject: [PATCH 18/41] add `hideEnvironment` to Mitsuba XML --- 22.RaytracedAO/README.md | 8 ++++++++ 22.RaytracedAO/Renderer.cpp | 11 +++++++++-- 22.RaytracedAO/Renderer.h | 3 ++- 22.RaytracedAO/closestHit.comp | 21 ++++++++++++--------- 22.RaytracedAO/raygen.comp | 6 ++++-- 22.RaytracedAO/raytraceCommon.glsl | 2 +- 22.RaytracedAO/raytraceCommon.h | 4 +++- media | 2 +- 8 files changed, 40 insertions(+), 17 deletions(-) diff --git a/22.RaytracedAO/README.md b/22.RaytracedAO/README.md index 18d1c0e74..6ddb1097f 100644 --- a/22.RaytracedAO/README.md +++ b/22.RaytracedAO/README.md @@ -52,6 +52,14 @@ Multiple Sensor tags in mitsuba XML's is now supported. This feature helps you h You can switch between those sensors using `PAGE UP/DOWN` Keys defined in more detail below. +### Properties added to \: + +| Property Name | Description | Type | Default Value | +|-----------------|-------------------------------------------|---------|----------------| +| hideEnvironment | Replace bakcground with Transparent Alpha | boolean | false | + +Note that we don't support Mitsuba's `hideEmitters` + ### Properties added to \: | Property Name | Description | Type | Default Value | diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 46181a9b5..93ee36fba 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -51,7 +51,7 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_rrManager(ext::RadeonRays::Manager::create(m_driver)), m_prevView(), m_prevCamTform(), m_sceneBound(FLT_MAX,FLT_MAX,FLT_MAX,-FLT_MAX,-FLT_MAX,-FLT_MAX), m_maxAreaLightLuma(0.f), m_framesDispatched(0u), m_rcpPixelSize{0.f,0.f}, - m_staticViewData{ {0u,0u},0u,0u,0u,0u,core::infinity(),{}}, m_raytraceCommonData{0.f,0u,0u,0u,core::matrix3x4SIMD()}, + m_staticViewData{ {0u,0u},0u,0u,0u,0u,false,core::infinity(),{}}, m_raytraceCommonData{0.f,0u,0u,0u,core::matrix3x4SIMD()}, m_indirectDrawBuffers{nullptr},m_cullPushConstants{core::matrix4SIMD(),1.f,0u,0u,0u},m_cullWorkGroups(0u), m_raygenWorkGroups{0u,0u},m_visibilityBuffer(nullptr),m_colorBuffer(nullptr), m_envMapImportanceSampling(_driver) @@ -215,8 +215,11 @@ Renderer::InitializationData Renderer::initSceneObjects(const SAssetBundle& mesh { case Enum::DIRECT: maxPathDepth = 2u; + hideEnvironment = integrator->direct.hideEnvironment; break; case Enum::PATH: + hideEnvironment = integrator->path.hideEnvironment; + [[fallthrough]]; case Enum::VOL_PATH_SIMPLE: case Enum::VOL_PATH: case Enum::BDPT: @@ -1149,6 +1152,7 @@ void Renderer::initSceneResources(SAssetBundle& meshes, nbl::io::path&& _sampleS } std::cout << "\tmaxPathDepth = " << maxPathDepth << std::endl; std::cout << "\tnoRussianRouletteDepth = " << noRussianRouletteDepth << std::endl; + std::cout << "\thideEnvironment = " << hideEnvironment << std::endl; std::cout << "\tmaxSamples = " << maxSensorSamples << std::endl; } } @@ -1188,7 +1192,7 @@ void Renderer::deinitSceneResources() m_finalEnvmap = nullptr; m_envMapImportanceSampling.deinitResources(); - m_staticViewData = {{0u,0u},0u,0u,0u,0u,core::infinity(),{}}; + m_staticViewData = {{0u,0u},0u,0u,0u,0u,false,core::infinity(),{}}; auto rr = m_rrManager->getRadeonRaysAPI(); rr->DetachAll(); @@ -1204,6 +1208,7 @@ void Renderer::deinitSceneResources() maxPathDepth = DefaultPathDepth; noRussianRouletteDepth = 5u; + hideEnvironment = false; maxSensorSamples = MaxFreeviewSamples; } @@ -1282,6 +1287,7 @@ void Renderer::initScreenSizedResources( { m_staticViewData.maxPathDepth = maxPathDepth; m_staticViewData.noRussianRouletteDepth = noRussianRouletteDepth; + m_staticViewData.hideEnvmap = hideEnvironment; uint32_t _maxRaysPerDispatch = 0u; auto setRayBufferSizes = [renderPixelCount,this,&_maxRaysPerDispatch,&raygenBufferSize,&intersectionBufferSize](uint32_t sampleMultiplier) -> void @@ -1646,6 +1652,7 @@ void Renderer::deinitScreenSizedResources() m_staticViewData.maxPathDepth = DefaultPathDepth; m_staticViewData.noRussianRouletteDepth = 5u; m_staticViewData.samplesPerPixelPerDispatch = 1u; + m_staticViewData.hideEnvmap = false; m_staticViewData.envMapPDFNormalizationFactor = core::infinity(); m_staticViewData.cascadeParams = {}; m_totalRaysCast = 0ull; diff --git a/22.RaytracedAO/Renderer.h b/22.RaytracedAO/Renderer.h index 6a05eeb40..aa09506ce 100644 --- a/22.RaytracedAO/Renderer.h +++ b/22.RaytracedAO/Renderer.h @@ -207,7 +207,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr bufferView; } sampleSequence; uint16_t maxPathDepth; - uint16_t noRussianRouletteDepth; + uint16_t noRussianRouletteDepth : 15; + uint16_t hideEnvironment : 1; uint32_t maxSensorSamples; // scene specific data diff --git a/22.RaytracedAO/closestHit.comp b/22.RaytracedAO/closestHit.comp index cc576a895..ba0183b80 100644 --- a/22.RaytracedAO/closestHit.comp +++ b/22.RaytracedAO/closestHit.comp @@ -135,16 +135,19 @@ void main() addAlbedo(contrib.albedo*aovThroughput,accumulationLocation); addWorldspaceNormal(contrib.worldspaceNormal*nbl_glsl_MC_colorToScalar(aovThroughput),accumulationLocation); // only misses contribute to transparency - float mask = 0.f; - if (!hit) + if (bool(staticViewData.sampleSequenceStride_hideEnvmap>>31)) { - // make the luma of throughput dictate transparency - mask = dot(aovThroughput,transpose(nbl_glsl_sRGBtoXYZ)[1]); - // only count transmissions - const vec2 texCoordUV = (vec2(accumulationLocation.xy)+vec2(0.5))/vec2(getImageDimensions(staticViewData)); - const vec3 seeThroughDir = normalize(mat3(pc.cummon.viewDirReconFactors)*vec3(texCoordUV,1.f)); - mask *= pow(max(dot(normalizedV,seeThroughDir),0.f),1024.f); + float mask = 0.f; + if (!hit) + { + // make the luma of throughput dictate transparency + mask = dot(aovThroughput,transpose(nbl_glsl_sRGBtoXYZ)[1]); + // only count transmissions + const vec2 texCoordUV = (vec2(accumulationLocation.xy)+vec2(0.5))/vec2(getImageDimensions(staticViewData)); + const vec3 seeThroughDir = normalize(mat3(pc.cummon.viewDirReconFactors)*vec3(texCoordUV,1.f)); + mask *= pow(max(dot(normalizedV,seeThroughDir),0.f),1024.f); + } + addMask(mask,accumulationLocation); } - addMask(mask,accumulationLocation); } } \ No newline at end of file diff --git a/22.RaytracedAO/raygen.comp b/22.RaytracedAO/raygen.comp index 90ce0a1a5..e9ed177c7 100644 --- a/22.RaytracedAO/raygen.comp +++ b/22.RaytracedAO/raygen.comp @@ -127,18 +127,20 @@ void main() ); } + const bool hideEnvmap = bool(staticViewData.sampleSequenceStride_hideEnvmap>>31); // clear accumulations totally if beginning a new frame if (firstFrame) { storeAlbedo(contrib.albedo,coord); storeWorldspaceNormal(contrib.worldspaceNormal,coord); - storeMask(hit ? 0.f:1.f,coord); + storeMask(hideEnvmap&&(!hit) ? 1.f:0.f,coord); } else { addAlbedo(contrib.albedo,coord,pc.cummon.rcpFramesDispatched); addWorldspaceNormal(contrib.worldspaceNormal,coord,pc.cummon.rcpFramesDispatched); - addMask(hit ? 0.f:1.f,coord,pc.cummon.rcpFramesDispatched); + if (hideEnvmap) + addMask(hit ? 0.f:1.f,coord,pc.cummon.rcpFramesDispatched); } } } diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index b13b4bb47..77aa25eee 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -345,7 +345,7 @@ mat2x3 rand6d(in uvec3 scramble_keys[2], in int _sample, int depth) // decrement depth because first vertex is rasterized and picked with a different sample sequence --depth; // - const int offset = int(_sample*staticViewData.sampleSequenceStride)+depth*SAMPLING_STRATEGY_COUNT; + const int offset = int(_sample*staticViewData.sampleSequenceStride_hideEnvmap&0x7fFFffFFu)+depth*SAMPLING_STRATEGY_COUNT; const nbl_glsl_sampling_quantized3D quant1 = texelFetch(quantizedSampleSequence, offset).xy; const nbl_glsl_sampling_quantized3D quant2 = texelFetch(quantizedSampleSequence, offset+1).xy; diff --git a/22.RaytracedAO/raytraceCommon.h b/22.RaytracedAO/raytraceCommon.h index a070b2a94..595fc7198 100644 --- a/22.RaytracedAO/raytraceCommon.h +++ b/22.RaytracedAO/raytraceCommon.h @@ -98,11 +98,13 @@ struct StaticViewData_t uint8_t maxPathDepth; uint8_t noRussianRouletteDepth; uint16_t samplesPerPixelPerDispatch; + uint32_t sampleSequenceStride : 31; + uint32_t hideEnvmap : 1; #else uint imageDimensions; uint maxPathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch; + uint sampleSequenceStride_hideEnvmap; #endif - uint sampleSequenceStride; // this is a very small number actually, probably 20 bits left to play with float envMapPDFNormalizationFactor; nbl_glsl_RWMC_CascadeParameters cascadeParams; }; diff --git a/media b/media index f9521cebc..9ee99d1d1 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit f9521cebc4f12ad9fa6b7b4dd53f0a5305a9533b +Subproject commit 9ee99d1d1d5bb1d30232f206307ccea014905c2f From e051c3cba6dc7e42a61bf98fdb5b896b4fdfdd16 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 12 Dec 2024 15:47:38 +0100 Subject: [PATCH 19/41] envmap on wrong binding --- 22.RaytracedAO/Renderer.cpp | 2 +- 22.RaytracedAO/raytraceCommon.glsl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 93ee36fba..10d61f544 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -1475,7 +1475,7 @@ void Renderer::initScreenSizedResources( }); // Set last write - writes[10].binding = 9u; + writes[10].binding = 10u; writes[10].arrayElement = 0u; writes[10].count = 1u; writes[10].descriptorType = EDT_COMBINED_IMAGE_SAMPLER; diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index 77aa25eee..19aa0081d 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -345,7 +345,7 @@ mat2x3 rand6d(in uvec3 scramble_keys[2], in int _sample, int depth) // decrement depth because first vertex is rasterized and picked with a different sample sequence --depth; // - const int offset = int(_sample*staticViewData.sampleSequenceStride_hideEnvmap&0x7fFFffFFu)+depth*SAMPLING_STRATEGY_COUNT; + const int offset = _sample*int(staticViewData.sampleSequenceStride_hideEnvmap&0x7fFFffFFu)+depth*SAMPLING_STRATEGY_COUNT; const nbl_glsl_sampling_quantized3D quant1 = texelFetch(quantizedSampleSequence, offset).xy; const nbl_glsl_sampling_quantized3D quant2 = texelFetch(quantizedSampleSequence, offset+1).xy; From 609ecb23e229674099d06a3f6896bd02d43dd85f Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 13 Dec 2024 10:23:17 +0100 Subject: [PATCH 20/41] they call me typo king --- 39.DenoiserTonemapper/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/39.DenoiserTonemapper/main.cpp b/39.DenoiserTonemapper/main.cpp index 6b0d5d43f..bcb3fe319 100644 --- a/39.DenoiserTonemapper/main.cpp +++ b/39.DenoiserTonemapper/main.cpp @@ -699,7 +699,7 @@ void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_ uint dataOffset = coords.y*pc.data.inImageTexelPitch[EII_COLOR]+coords.x; vec4 color = vec4(outBuffer[dataOffset]); color[channel] = complex_value.x; - if (channel==4) + if (channel==3) { color.rgb = _NBL_GLSL_EXT_LUMA_METER_XYZ_CONVERSION_MATRIX_DEFINED_*color.rgb; color.rgb *= intensity[pc.data.intensityBufferDWORDOffset]; // *= 0.18/AvgLuma From b988ed1cf74a0f43a6f5f8709149ea119c732276 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 13 Jan 2025 18:20:23 +0100 Subject: [PATCH 21/41] fix tangent frame handling for instances with a negative transformation determinant p.S. also two right were making an XOR wrong, no point storing `gl_FrontFacing` anywhere. --- 22.RaytracedAO/closestHit.comp | 5 +++-- 22.RaytracedAO/fillVisBuffer.frag | 2 +- 22.RaytracedAO/raygen.comp | 4 +++- 22.RaytracedAO/raytraceCommon.glsl | 4 ++++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/22.RaytracedAO/closestHit.comp b/22.RaytracedAO/closestHit.comp index ba0183b80..032fa4355 100644 --- a/22.RaytracedAO/closestHit.comp +++ b/22.RaytracedAO/closestHit.comp @@ -71,9 +71,10 @@ void main() // positions const vec3 lastVxPos = load_positions(batchInstanceData,indices); + if (!bool(batchInstanceData.determinantSignBit&0x80000000u)) + normalizedG = -normalizedG; + const bool frontfacing = dot(normalizedV,normalizedG)>=0.f; - const bool frontfacing = bool((batchInstanceData.determinantSignBit^floatBitsToUint(dot(normalizedV,normalizedG)))&0x80000000u); - // get material const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing); contrib.color = contrib.albedo = nbl_glsl_MC_oriented_material_t_getEmissive(material, normalizedV); diff --git a/22.RaytracedAO/fillVisBuffer.frag b/22.RaytracedAO/fillVisBuffer.frag index 9bce3dc26..88a18455a 100644 --- a/22.RaytracedAO/fillVisBuffer.frag +++ b/22.RaytracedAO/fillVisBuffer.frag @@ -29,7 +29,7 @@ void main() vec2 bary = nbl_glsl_barycentric_frag_get(); const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1; - frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[0] = bitfieldInsert(BackfacingBit_BatchInstanceGUID,gl_PrimitiveID,31-triangleIDBitcount,triangleIDBitcount)^(gl_FrontFacing ? 0x0u:0x80000000u); + frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[0] = bitfieldInsert(BackfacingBit_BatchInstanceGUID,gl_PrimitiveID,31-triangleIDBitcount,triangleIDBitcount); frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[1] = packUnorm2x16(bary); frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[2] = packHalf2x16(dFdx(bary)); frontFacingTriangleIDDrawID_unorm16Bary_dBarydScreenHalf2x2[3] = packHalf2x16(dFdy(bary)); diff --git a/22.RaytracedAO/raygen.comp b/22.RaytracedAO/raygen.comp index e9ed177c7..f5886ef04 100644 --- a/22.RaytracedAO/raygen.comp +++ b/22.RaytracedAO/raygen.comp @@ -41,7 +41,6 @@ void main() if (hit) { // vis buffer decode - const bool frontfacing = !bool(visBuffer[0]&0x80000000u); const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1; const uint triangleID = bitfieldExtract(visBuffer[0],31-triangleIDBitcount,triangleIDBitcount); const uint batchInstanceGUID = bitfieldExtract(visBuffer[0],0,31-triangleIDBitcount); @@ -57,6 +56,9 @@ void main() // load vertex data const vec3 lastVxPos = load_positions(batchInstanceData,indices); + if (!bool(batchInstanceData.determinantSignBit&0x80000000u)) + normalizedG = -normalizedG; + const bool frontfacing = dot(normalizedV,normalizedG)>=0.f; // get material while waiting for indices const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing); diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index 19aa0081d..0d5f2dd27 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -303,6 +303,10 @@ vec3 load_normal_and_prefetch_textures( dUVdBary = mat2(uvs[0]-uvs[2],uvs[1]-uvs[2]); const vec2 UV = dUVdBary*compactBary+uvs[2]; + // flip the tangent frame if mesh got flipped to undo Left Handed tangent frame + if (!bool(batchInstanceData.determinantSignBit&0x80000000u)) + dUVdBary = -dUVdBary; + // the direction/winding of the UV-space parallelogram doesn't matter for texture filtering const mat2 dUVdScreen = nbl_glsl_applyChainRule2D(dUVdBary,dBarydScreen); nbl_glsl_MC_runTexPrefetchStream(tps,UV,dUVdScreen*pc.cummon.textureFootprintFactor); } From e507dc53d33a9846397972d1ecba721ebc20a1d5 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 13 Jan 2025 19:02:57 +0100 Subject: [PATCH 22/41] sentinel check on up-vectors was wrong (not taking into account normalization of directions) --- 22.RaytracedAO/main.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index 5c44b2396..6184fe0c0 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -874,7 +874,9 @@ int main(int argc, char** argv) staticCamera->setTarget(target.getAsVector3df()); } - if (core::dot(core::normalize(core::cross(staticCamera->getUpVector(),mainCamView)),core::cross(mainCamUp,mainCamView)).x<0.99f) + auto reconstructedRight = core::cross(staticCamera->getUpVector(),core::normalize(mainCamView)); + auto actualRight = core::cross(core::normalize(mainCamUp),core::normalize(mainCamView)); + if (core::dot(reconstructedRight,actualRight).x<0.99f) staticCamera->setUpVector(mainCamUp); // From 93bba7944ae14bd362e27b58c73896c321c9f5c0 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 14 Jan 2025 08:04:08 +0100 Subject: [PATCH 23/41] titlebar as requested by cutealien --- 22.RaytracedAO/main.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index 6184fe0c0..fab980420 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -1324,7 +1324,8 @@ int main(int argc, char** argv) auto samples = renderer->getTotalSamplesComputed(); auto rays = renderer->getTotalRaysCast(); const double microsecondsElapsed = std::chrono::duration_cast(std::chrono::steady_clock::now()-start).count(); - str << L"Raytraced Shadows Demo - Nabla Engine MegaSamples: " << samples/1000000ull + str << L"Nabla Path Tracer: " << applicationState.zipPath.c_str() << "\\" << applicationState.xmlPath.c_str() + << " MegaSamples: " << samples/1000000ull << " MSample/s: " << double(samples)/microsecondsElapsed << " MRay/s: " << double(rays)/microsecondsElapsed; From 6d6ac56357bae14cc91ae8693d766d269de8fe1c Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 14 Jan 2025 12:27:09 +0100 Subject: [PATCH 24/41] reduce sentinel --- 22.RaytracedAO/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index fab980420..adabe0696 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -876,7 +876,7 @@ int main(int argc, char** argv) auto reconstructedRight = core::cross(staticCamera->getUpVector(),core::normalize(mainCamView)); auto actualRight = core::cross(core::normalize(mainCamUp),core::normalize(mainCamView)); - if (core::dot(reconstructedRight,actualRight).x<0.99f) + if (core::dot(reconstructedRight,actualRight).x<0.97f) staticCamera->setUpVector(mainCamUp); // From 355a28327d66ca8b59090544b6fed62f0eca706d Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 30 Jan 2025 00:52:00 +0100 Subject: [PATCH 25/41] the reconstructed right vectors weren't normalized :facepalm: --- 22.RaytracedAO/main.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index adabe0696..d051f8e6f 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -753,6 +753,8 @@ int main(int argc, char** argv) auto tpose = core::transpose(core::matrix4SIMD(relativeTransform)); mainCamUp = tpose.rows[1]; mainCamView = tpose.rows[2]; + + std::cout << "\t Camera Reconstructed UpVector = <" << mainCamView.x << "," << mainCamView.y << "," << mainCamView.z << ">" << std::endl; } float realFoVDegrees; @@ -874,10 +876,14 @@ int main(int argc, char** argv) staticCamera->setTarget(target.getAsVector3df()); } - auto reconstructedRight = core::cross(staticCamera->getUpVector(),core::normalize(mainCamView)); - auto actualRight = core::cross(core::normalize(mainCamUp),core::normalize(mainCamView)); - if (core::dot(reconstructedRight,actualRight).x<0.97f) - staticCamera->setUpVector(mainCamUp); + { + auto reconstructedRight = core::normalize(core::cross(staticCamera->getUpVector(),mainCamView)); + auto actualRight = core::normalize(core::cross(mainCamUp,mainCamView)); + const float dp = core::dot(reconstructedRight,actualRight).x; + std::cout << "\t Camera Reconstructed UpVector match score = "<< dp << std::endl; + if (dp<0.96f) + staticCamera->setUpVector(mainCamUp); + } // if (ortho) From c88647f58b5840fadf97351dff4b15568413b582 Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 30 Jan 2025 01:17:07 +0100 Subject: [PATCH 26/41] now after the up vector compatibility fixes the upvector vs. view/target vector check was gone --- 22.RaytracedAO/main.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index d051f8e6f..6ee41b43e 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -754,7 +754,8 @@ int main(int argc, char** argv) mainCamUp = tpose.rows[1]; mainCamView = tpose.rows[2]; - std::cout << "\t Camera Reconstructed UpVector = <" << mainCamView.x << "," << mainCamView.y << "," << mainCamView.z << ">" << std::endl; + std::cout << "\t Camera Reconstructed UpVector = <" << mainCamUp.x << "," << mainCamUp.y << "," << mainCamUp.z << ">" << std::endl; + std::cout << "\t Camera Reconstructed Forward = <" << mainCamView.x << "," << mainCamView.y << "," << mainCamView.z << ">" << std::endl; } float realFoVDegrees; @@ -877,11 +878,14 @@ int main(int argc, char** argv) } { - auto reconstructedRight = core::normalize(core::cross(staticCamera->getUpVector(),mainCamView)); - auto actualRight = core::normalize(core::cross(mainCamUp,mainCamView)); - const float dp = core::dot(reconstructedRight,actualRight).x; + auto defaultUp = staticCamera->getUpVector(); + auto reconstructedRight = core::cross(defaultUp,mainCamView); + auto actualRight = core::cross(mainCamUp,mainCamView); + // special formulation avoiding multiple sqrt and inversesqrt to preserve precision + const float dp = core::dot(reconstructedRight,actualRight).x/core::sqrt((core::dot(reconstructedRight,reconstructedRight)*core::dot(actualRight,actualRight)).x); + const float pb = core::dot(defaultUp,mainCamView).x/core::sqrt((core::dot(defaultUp,defaultUp)*core::dot(mainCamView,mainCamView)).x); std::cout << "\t Camera Reconstructed UpVector match score = "<< dp << std::endl; - if (dp<0.96f) + if (dp<0.97f || dp>1.03f || abs(pb)>0.999f) staticCamera->setUpVector(mainCamUp); } From 60071d5edbb3804dbc1409698d621ec7318d9f8f Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 30 Jan 2025 13:31:45 +0100 Subject: [PATCH 27/41] update readme.md and treat new XML tag --- 22.RaytracedAO/README.md | 12 +++++++----- 22.RaytracedAO/main.cpp | 10 ++++++---- media | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/22.RaytracedAO/README.md b/22.RaytracedAO/README.md index 6ddb1097f..106612acd 100644 --- a/22.RaytracedAO/README.md +++ b/22.RaytracedAO/README.md @@ -62,11 +62,12 @@ Note that we don't support Mitsuba's `hideEmitters` ### Properties added to \: -| Property Name | Description | Type | Default Value | -|---------------|-----------------------|-------|------------------------------------------| -| moveSpeed | Camera Movement Speed | float | NaN -> Will be deduced from scene bounds | -| zoomSpeed | Camera Zoom Speed | float | NaN -> Will be deduced from scene bounds | -| rotateSpeed | Camera Rotation Speed | float | 300.0 | +| Property Name | Description | Type | Default Value | +|---------------|-------------------------------------|---------|------------------------------------------| +| up | Up Vector for roll around view axis | vector | 0.0, 1.0, 0.0 | +| moveSpeed | Camera Movement Speed | float | NaN -> Will be deduced from scene bounds | +| zoomSpeed | Camera Zoom Speed | float | NaN -> Will be deduced from scene bounds | +| rotateSpeed | Camera Rotation Speed | float | 300.0 | #### Properties added to \: @@ -101,6 +102,7 @@ Note that we don't support Mitsuba's `hideEmitters` ```xml + diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index 6ee41b43e..ba56c1cfe 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -878,14 +878,16 @@ int main(int argc, char** argv) } { - auto defaultUp = staticCamera->getUpVector(); - auto reconstructedRight = core::cross(defaultUp,mainCamView); + auto declaredUp = cameraBase->up; + auto reconstructedRight = core::cross(declaredUp,mainCamView); auto actualRight = core::cross(mainCamUp,mainCamView); // special formulation avoiding multiple sqrt and inversesqrt to preserve precision const float dp = core::dot(reconstructedRight,actualRight).x/core::sqrt((core::dot(reconstructedRight,reconstructedRight)*core::dot(actualRight,actualRight)).x); - const float pb = core::dot(defaultUp,mainCamView).x/core::sqrt((core::dot(defaultUp,defaultUp)*core::dot(mainCamView,mainCamView)).x); + const float pb = core::dot(declaredUp,mainCamView).x/core::sqrt((core::dot(declaredUp,declaredUp)*core::dot(mainCamView,mainCamView)).x); std::cout << "\t Camera Reconstructed UpVector match score = "<< dp << std::endl; - if (dp<0.97f || dp>1.03f || abs(pb)>0.999f) + if (dp>0.97f && dp<1.03f && abs(pb)<0.9996f) + staticCamera->setUpVector(declaredUp); + else staticCamera->setUpVector(mainCamUp); } diff --git a/media b/media index 9ee99d1d1..ad2cb3a9a 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit 9ee99d1d1d5bb1d30232f206307ccea014905c2f +Subproject commit ad2cb3a9a1655c5c4d0ffa1c515f710568f0487d From 59ef73e90ce5af00bc31b2c2f3dd25c34847d612 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 10 Feb 2025 09:06:02 +0100 Subject: [PATCH 28/41] pass the clipping planes to shader (but now we need to recompile the shader every time - TODO) --- 22.RaytracedAO/README.md | 15 +++++++++------ 22.RaytracedAO/Renderer.cpp | 6 +++++- 22.RaytracedAO/Renderer.h | 4 +++- 22.RaytracedAO/fillVisBuffer.vert | 23 +++++++++++++++++++++++ 22.RaytracedAO/main.cpp | 20 ++++++++++++++++---- 5 files changed, 56 insertions(+), 12 deletions(-) diff --git a/22.RaytracedAO/README.md b/22.RaytracedAO/README.md index 106612acd..bf3f9d230 100644 --- a/22.RaytracedAO/README.md +++ b/22.RaytracedAO/README.md @@ -62,12 +62,15 @@ Note that we don't support Mitsuba's `hideEmitters` ### Properties added to \: -| Property Name | Description | Type | Default Value | -|---------------|-------------------------------------|---------|------------------------------------------| -| up | Up Vector for roll around view axis | vector | 0.0, 1.0, 0.0 | -| moveSpeed | Camera Movement Speed | float | NaN -> Will be deduced from scene bounds | -| zoomSpeed | Camera Zoom Speed | float | NaN -> Will be deduced from scene bounds | -| rotateSpeed | Camera Rotation Speed | float | 300.0 | +| Property Name | Description | Type | Default Value | +|---------------|-------------------------------------------------------------------------------------|---------|------------------------------------------| +| up | Up Vector to determine roll around view axis and the north pole to rotate around | vector | 0.0, 1.0, 0.0 | +| moveSpeed | Camera Movement Speed | float | NaN -> Will be deduced from scene bounds | +| zoomSpeed | Camera Zoom Speed | float | NaN -> Will be deduced from scene bounds | +| rotateSpeed | Camera Rotation Speed | float | 300.0 | +| clipPlaneN\* | Worldspace coefficients for a plane equation of the form `a*x + b*y + c*z + w >= 0` | vector | NaN, NaN, NaN, NaN | + +\* N ranges from 0 to 5 #### Properties added to \: diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 10d61f544..650a4a962 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -1238,7 +1238,8 @@ void Renderer::initScreenSizedResources( int32_t cascadeCount, float cascadeLuminanceBase, float cascadeLuminanceStart, - const float Emin + const float Emin, + const nbl::core::vector& clipPlanes ) { float maxEmitterRadianceLuma; @@ -1315,6 +1316,9 @@ void Renderer::initScreenSizedResources( m_staticViewData.sampleSequenceStride = SampleSequence::computeQuantizedDimensions(maxPathDepth); auto stream = std::ofstream("runtime_defines.glsl"); + for (auto i=0; im_global.getVTStorageViewCount() << "\n" << m_globalMeta->m_global.m_materialCompilerGLSL_declarations << "#ifndef MAX_RAYS_GENERATED\n" diff --git a/22.RaytracedAO/Renderer.h b/22.RaytracedAO/Renderer.h index aa09506ce..bcfb653bd 100644 --- a/22.RaytracedAO/Renderer.h +++ b/22.RaytracedAO/Renderer.h @@ -18,6 +18,7 @@ #include #include #include +#include class Renderer : public nbl::core::IReferenceCounted, public nbl::core::InterfaceUnmovable { @@ -55,7 +56,8 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac int32_t cascadeCount, float cascadeLuminanceBase, float cascadeLuminanceStart, - const float Emin + const float Emin, + const nbl::core::vector& clipPlanes={} ); void deinitScreenSizedResources(); diff --git a/22.RaytracedAO/fillVisBuffer.vert b/22.RaytracedAO/fillVisBuffer.vert index 6c9279e02..eca6aa925 100644 --- a/22.RaytracedAO/fillVisBuffer.vert +++ b/22.RaytracedAO/fillVisBuffer.vert @@ -10,6 +10,8 @@ #define _NBL_GLSL_EXT_MITSUBA_LOADER_INSTANCE_DATA_BINDING_ 0 #include "virtualGeometry.glsl" +#include "runtime_defines.glsl" + layout(set=2, binding=0, row_major) readonly restrict buffer PerInstancePerCamera { DrawData_t data[]; @@ -31,4 +33,25 @@ void main() const vec3 modelPos = nbl_glsl_fetchVtxPos(gl_VertexIndex,InstData.data[batchInstanceGUID]); nbl_glsl_barycentric_vert_set(modelPos); gl_Position = nbl_glsl_pseudoMul4x4with3x1(self.MVP,modelPos); + + // clipping +#ifdef CLIP_PLANE_0 + const vec4 worldPos = vec4(nbl_glsl_pseudoMul3x4with3x1(InstData.data[batchInstanceGUID].tform,modelPos),1.0); + gl_ClipDistance[0] = dot(CLIP_PLANE_0,worldPos); +#ifdef CLIP_PLANE_1 + gl_ClipDistance[1] = dot(CLIP_PLANE_1,worldPos); +#ifdef CLIP_PLANE_2 + gl_ClipDistance[2] = dot(CLIP_PLANE_2,worldPos); +#ifdef CLIP_PLANE_3 + gl_ClipDistance[3] = dot(CLIP_PLANE_3,worldPos); +#ifdef CLIP_PLANE_4 + gl_ClipDistance[4] = dot(CLIP_PLANE_4,worldPos); +#ifdef CLIP_PLANE_5 + gl_ClipDistance[5] = dot(CLIP_PLANE_5,worldPos); +#endif +#endif +#endif +#endif +#endif +#endif } diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index ba56c1cfe..ec0025ab4 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -536,6 +536,7 @@ int main(int argc, char** argv) float Emin = 0.05f; bool envmap = false; float envmapRegFactor = 0.0f; + core::vector clipPlanes; scene::CSceneNodeAnimatorCameraModifiedMaya* getInteractiveCameraAnimator() { @@ -614,8 +615,8 @@ int main(int argc, char** argv) }; const bool shouldHaveSensorIdxInFileName = globalMeta->m_global.m_sensors.size() > 1; - std::vector sensors = std::vector(); - std::vector cubemapRenders = std::vector(); + std::vector sensors; + std::vector cubemapRenders; auto extractAndAddToSensorData = [&](const ext::MitsubaLoader::CElementSensor& sensor, uint32_t idx) -> bool { @@ -683,6 +684,17 @@ int main(int argc, char** argv) return false; } mainSensorData.type = sensor.type; + + for (auto i=0; iclipPlanes[i]; + if ((plane!=core::vectorSIMDf()).any()) + { + mainSensorData.clipPlanes.push_back(plane); + printf("Found Clip Plane %f,%f,%f,%f\n",plane); + } + } + mainSensorData.rotateSpeed = cameraBase->rotateSpeed; mainSensorData.stepZoomSpeed = cameraBase->zoomSpeed; mainSensorData.moveSpeed = cameraBase->moveSpeed; @@ -1073,7 +1085,7 @@ int main(int argc, char** argv) if(needsReinit) { renderer->deinitScreenSizedResources(); - renderer->initScreenSizedResources(sensor.width,sensor.height,sensor.envmapRegFactor,sensor.cascadeCount,sensor.cascadeLuminanceBase,sensor.cascadeLuminanceStart,sensor.Emin); + renderer->initScreenSizedResources(sensor.width,sensor.height,sensor.envmapRegFactor,sensor.cascadeCount,sensor.cascadeLuminanceBase,sensor.cascadeLuminanceStart,sensor.Emin,sensor.clipPlanes); } smgr->setActiveCamera(sensor.staticCamera); @@ -1208,7 +1220,7 @@ int main(int argc, char** argv) { renderer->deinitScreenSizedResources(); const auto& sensorData = sensors[activeSensor]; - renderer->initScreenSizedResources(sensorData.width,sensorData.height,sensorData.envmapRegFactor,sensorData.cascadeCount,sensorData.cascadeLuminanceBase,sensorData.cascadeLuminanceStart,sensorData.Emin); + renderer->initScreenSizedResources(sensorData.width,sensorData.height,sensorData.envmapRegFactor,sensorData.cascadeCount,sensorData.cascadeLuminanceBase,sensorData.cascadeLuminanceStart,sensorData.Emin,sensorData.clipPlanes); } smgr->setActiveCamera(sensors[activeSensor].interactiveCamera); From 3e6965be90cb29b6c3ca06cb5ae9880a10ba69c7 Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 10 Feb 2025 11:57:27 +0100 Subject: [PATCH 29/41] the vertex shader and graphics pipeline need to be made JIT now, also glEnable/glDisable GL_CLIP_DISTANCE --- 22.RaytracedAO/Renderer.cpp | 54 +++++++++++++++++++++++-------------- 22.RaytracedAO/Renderer.h | 1 + 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 650a4a962..39152931f 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -75,6 +75,9 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_littleDownloadBuffer->getBoundMemory()->mapMemoryRange(IDriverMemoryAllocation::EMCAF_READ,{0,sizeof(uint32_t)}); } + // no deferral for now + m_fragGPUShader = gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../fillVisBuffer.frag"); + // set up Visibility Buffer pipeline { IGPUDescriptorSetLayout::SBinding binding; @@ -98,23 +101,6 @@ Renderer::Renderer(IVideoDriver* _driver, IAssetManager* _assetManager, scene::I m_cullDSLayout = m_driver->createGPUDescriptorSetLayout(bindings,bindings+cullingDescriptorCount); } m_perCameraRasterDSLayout = core::smart_refctd_ptr(m_cullDSLayout); - { - core::smart_refctd_ptr shaders[] = {gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../fillVisBuffer.vert"),gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../fillVisBuffer.frag")}; - SPrimitiveAssemblyParams primitiveAssembly; - primitiveAssembly.primitiveType = EPT_TRIANGLE_LIST; - SRasterizationParams raster; - raster.faceCullingMode = EFCM_NONE; - auto _visibilityBufferFillPipelineLayout = m_driver->createGPUPipelineLayout( - nullptr,nullptr, - core::smart_refctd_ptr(m_rasterInstanceDataDSLayout), - core::smart_refctd_ptr(m_additionalGlobalDSLayout), - core::smart_refctd_ptr(m_cullDSLayout) - ); - m_visibilityBufferFillPipeline = m_driver->createGPURenderpassIndependentPipeline( - nullptr,std::move(_visibilityBufferFillPipelineLayout),&shaders->get(),&shaders->get()+2u, - SVertexInputParams{},SBlendParams{},primitiveAssembly,raster - ); - } { constexpr auto raytracingCommonDescriptorCount = 11u; @@ -1316,8 +1302,16 @@ void Renderer::initScreenSizedResources( m_staticViewData.sampleSequenceStride = SampleSequence::computeQuantizedDimensions(maxPathDepth); auto stream = std::ofstream("runtime_defines.glsl"); - for (auto i=0; im_global.getVTStorageViewCount() << "\n" << m_globalMeta->m_global.m_materialCompilerGLSL_declarations @@ -1335,6 +1329,9 @@ void Renderer::initScreenSizedResources( // cull m_cullGPUShader = gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../cull.comp"); + // visbuffer + m_vertGPUShader = gpuSpecializedShaderFromFile(m_assetManager, m_driver, "../fillVisBuffer.vert"); + // raygen m_raygenGPUShader = gpuSpecializedShaderFromFile(m_assetManager,m_driver,"../raygen.comp"); @@ -1877,7 +1874,24 @@ bool Renderer::render(nbl::ITimer* timer, const float kappa, const float Emin, c bool compiledShaders = compileShadersFuture.get(); if(compiledShaders) { - m_cullPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_cullPipelineLayout), core::smart_refctd_ptr(m_cullGPUShader)); + m_cullPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_cullPipelineLayout), core::smart_refctd_ptr(m_cullGPUShader)); + { + IGPUSpecializedShader* shaders[] = {m_vertGPUShader.get(),m_fragGPUShader.get()}; + SPrimitiveAssemblyParams primitiveAssembly; + primitiveAssembly.primitiveType = EPT_TRIANGLE_LIST; + SRasterizationParams raster; + raster.faceCullingMode = EFCM_NONE; + auto _visibilityBufferFillPipelineLayout = m_driver->createGPUPipelineLayout( + nullptr,nullptr, + core::smart_refctd_ptr(m_rasterInstanceDataDSLayout), + core::smart_refctd_ptr(m_additionalGlobalDSLayout), + core::smart_refctd_ptr(m_cullDSLayout) + ); + m_visibilityBufferFillPipeline = m_driver->createGPURenderpassIndependentPipeline( + nullptr,std::move(_visibilityBufferFillPipelineLayout),shaders,shaders+2u, + SVertexInputParams{},SBlendParams{},primitiveAssembly,raster + ); + } m_raygenPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_raygenPipelineLayout), core::smart_refctd_ptr(m_raygenGPUShader)); m_closestHitPipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_closestHitPipelineLayout), core::smart_refctd_ptr(m_closestHitGPUShader)); m_resolvePipeline = m_driver->createGPUComputePipeline(nullptr,core::smart_refctd_ptr(m_resolvePipelineLayout), core::smart_refctd_ptr(m_resolveGPUShader)); diff --git a/22.RaytracedAO/Renderer.h b/22.RaytracedAO/Renderer.h index bcfb653bd..455a01862 100644 --- a/22.RaytracedAO/Renderer.h +++ b/22.RaytracedAO/Renderer.h @@ -182,6 +182,7 @@ class Renderer : public nbl::core::IReferenceCounted, public nbl::core::Interfac nbl::core::smart_refctd_ptr m_resolvePipelineLayout; nbl::core::smart_refctd_ptr m_cullGPUShader; + nbl::core::smart_refctd_ptr m_vertGPUShader,m_fragGPUShader; nbl::core::smart_refctd_ptr m_raygenGPUShader; nbl::core::smart_refctd_ptr m_closestHitGPUShader; nbl::core::smart_refctd_ptr m_resolveGPUShader; From 802cc7442d1136295a6575b5afa0ca7681d033bf Mon Sep 17 00:00:00 2001 From: devsh Date: Mon, 10 Feb 2025 12:06:58 +0100 Subject: [PATCH 30/41] update readme.md and remove dependency (this branch is not c++20 yet) --- 22.RaytracedAO/README.md | 2 +- 22.RaytracedAO/Renderer.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/22.RaytracedAO/README.md b/22.RaytracedAO/README.md index bf3f9d230..03bec2bc1 100644 --- a/22.RaytracedAO/README.md +++ b/22.RaytracedAO/README.md @@ -68,7 +68,7 @@ Note that we don't support Mitsuba's `hideEmitters` | moveSpeed | Camera Movement Speed | float | NaN -> Will be deduced from scene bounds | | zoomSpeed | Camera Zoom Speed | float | NaN -> Will be deduced from scene bounds | | rotateSpeed | Camera Rotation Speed | float | 300.0 | -| clipPlaneN\* | Worldspace coefficients for a plane equation of the form `a*x + b*y + c*z + w >= 0` | vector | NaN, NaN, NaN, NaN | +| clipPlaneN\* | Worldspace coefficients for a plane equation of the form `a*x + b*y + c*z + w >= 0` | vector | 0.0, 0.0, 0.0, 0.0 (disabled) | \* N ranges from 0 to 5 diff --git a/22.RaytracedAO/Renderer.h b/22.RaytracedAO/Renderer.h index 455a01862..5c8e45738 100644 --- a/22.RaytracedAO/Renderer.h +++ b/22.RaytracedAO/Renderer.h @@ -18,7 +18,6 @@ #include #include #include -#include class Renderer : public nbl::core::IReferenceCounted, public nbl::core::InterfaceUnmovable { From ae6d5732607002f91287ea863e7b6a3fc9247c5b Mon Sep 17 00:00:00 2001 From: devsh Date: Wed, 19 Feb 2025 17:44:35 +0100 Subject: [PATCH 31/41] fix clip plane logging --- 22.RaytracedAO/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index ec0025ab4..31c098435 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -691,7 +691,7 @@ int main(int argc, char** argv) if ((plane!=core::vectorSIMDf()).any()) { mainSensorData.clipPlanes.push_back(plane); - printf("Found Clip Plane %f,%f,%f,%f\n",plane); + printf("Found Clip Plane %f,%f,%f,%f\n",plane[0],plane[1],plane[2],plane[3]); } } From 63cdca6cbe4a10f68e1130ff994c56992f5def05 Mon Sep 17 00:00:00 2001 From: devsh Date: Sun, 25 May 2025 12:39:05 +0200 Subject: [PATCH 32/41] if it works, its not stupid --- 22.RaytracedAO/main.cpp | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index 31c098435..c6ffbd84b 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -60,6 +60,9 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver case ReloadKey: reloadKeyPressed = true; break; + case OverloadCameraKey: + overloadCameraKeyPressed = true; + break; case QuitKey: running = false; return true; @@ -89,6 +92,8 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver inline bool isReloadKeyPressed() const { return reloadKeyPressed; } + inline bool isOverloadCameraKeyPressed() const { return overloadCameraKeyPressed; } + inline void resetKeys() { skipKeyPressed = false; @@ -98,6 +103,7 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver screenshotKeyPressed = false; logProgressKeyPressed = false; reloadKeyPressed = false; + overloadCameraKeyPressed = false; } private: @@ -110,6 +116,7 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver static constexpr nbl::EKEY_CODE LogProgressKey = nbl::KEY_KEY_L; static constexpr nbl::EKEY_CODE BeautyKey = nbl::KEY_KEY_B; static constexpr nbl::EKEY_CODE ReloadKey = nbl::KEY_F5; + static constexpr nbl::EKEY_CODE OverloadCameraKey = nbl::KEY_KEY_C; bool running; bool renderingBeauty; @@ -121,6 +128,7 @@ class RaytracerExampleEventReceiver : public nbl::IEventReceiver bool screenshotKeyPressed; bool logProgressKeyPressed; bool reloadKeyPressed; + bool overloadCameraKeyPressed; }; struct PersistentState @@ -1243,12 +1251,39 @@ int main(int argc, char** argv) sensors[activeSensor].resetInteractiveCamera(); std::cout << "Interactive Camera Position and Target has been Reset." << std::endl; } - if(receiver.isNextPressed()) + else if(receiver.isOverloadCameraKeyPressed()) + { + pfd::open_file file("Choose XML file to overload camera with (only first sensor overrides)", "../../media/mitsuba", { "XML files (.xml)", "*.xml" }); + if (!file.result().empty()) + { + const auto filePath = file.result()[0]; + using namespace nbl::asset; + smart_refctd_ptr mitsubaMetadata; + { + static const IAssetLoader::SAssetLoadParams mitsubaLoaderParams = { 0, nullptr, IAssetLoader::ECF_DONT_CACHE_REFERENCES, nullptr, IAssetLoader::ELPF_LOAD_METADATA_ONLY }; + auto meshes_bundle = device->getAssetManager()->getAsset(filePath.data(),mitsubaLoaderParams); + if (!meshes_bundle.getContents().empty()) + mitsubaMetadata = smart_refctd_ptr(static_cast(meshes_bundle.getMetadata())); + } + if (!mitsubaMetadata || mitsubaMetadata->m_global.m_sensors.empty()) + os::Printer::log("ERROR (" + std::to_string(__LINE__) + " line): The xml file is invalid/cannot be loaded! File path: " + filePath, ELL_ERROR); + else + { + const uint32_t originalSensorCount = sensors.size(); + uint32_t idx = originalSensorCount; + for (const auto& sensor : mitsubaMetadata->m_global.m_sensors) + extractAndAddToSensorData(sensor,idx++); + setActiveSensor(originalSensorCount); + } + writeLastRunState = true; + } + } + else if(receiver.isNextPressed()) { setActiveSensor(activeSensor + 1); writeLastRunState = true; } - if(receiver.isPreviousPressed()) + else if(receiver.isPreviousPressed()) { setActiveSensor(activeSensor - 1); writeLastRunState = true; From a1938f1326fc156c5c3228b70929970612f74456 Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 11 Jul 2025 10:55:21 +0200 Subject: [PATCH 33/41] add options for recomputing `origin` at higher precision than from barycentics --- 22.RaytracedAO/raygen.comp | 39 ++++++++++++++++++++++++------ 22.RaytracedAO/raytraceCommon.glsl | 4 +-- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/22.RaytracedAO/raygen.comp b/22.RaytracedAO/raygen.comp index f5886ef04..f2f0b32cb 100644 --- a/22.RaytracedAO/raygen.comp +++ b/22.RaytracedAO/raygen.comp @@ -44,7 +44,7 @@ void main() const int triangleIDBitcount = findMSB(MAX_TRIANGLES_IN_BATCH-1)+1; const uint triangleID = bitfieldExtract(visBuffer[0],31-triangleIDBitcount,triangleIDBitcount); const uint batchInstanceGUID = bitfieldExtract(visBuffer[0],0,31-triangleIDBitcount); - const vec2 compactBary = unpackUnorm2x16(visBuffer[1]); +//const vec2 compactBary = unpackUnorm2x16(visBuffer[1]); #ifdef TEX_PREFETCH_STREAM // TODO: separate pipeline and separate out the barycentric derivative FBO attachment, only write if need to, only fetch if `needs_texture_prefetch` const mat2 dBarydScreen = mat2(unpackHalf2x16(visBuffer[2]),unpackHalf2x16(visBuffer[3])); @@ -58,18 +58,45 @@ void main() const vec3 lastVxPos = load_positions(batchInstanceData,indices); if (!bool(batchInstanceData.determinantSignBit&0x80000000u)) normalizedG = -normalizedG; - const bool frontfacing = dot(normalizedV,normalizedG)>=0.f; + const float VdotG = dot(normalizedV,normalizedG); + const bool frontfacing = VdotG>=0.f; - // get material while waiting for indices + // get material const nbl_glsl_MC_oriented_material_t material = nbl_glsl_MC_material_data_t_getOriented(batchInstanceData.material,frontfacing); contrib.color = contrib.albedo = nbl_glsl_MC_oriented_material_t_getEmissive(material,normalizedV); // little optimization for non-twosided materials if (material.genchoice_count!=0u) { - // get initial scramble key while waiting for vertex positions + // get initial scramble key const nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,ivec2(outPixelLocation),0).rg; + vec3 origin; + #if RECOMPUTE_BARY + // we know the ray will intersect the triangle + vec2 compactBary; + { + // reversed order of arguments for each cross cause V is negative + const vec3 ray_cross_e2 = cross(dPdBary[1],normalizedV); + const float detRcp = 1.f/dot(dPdBary[0],ray_cross_e2); + // assert(!isinf(detRcp)); + const vec3 s = (pc.cummon.viewDirReconFactors[3]-lastVxPos)*detRcp; + const float u = dot(s,ray_cross_e2); + // assert(0.f<=u && u<=1.f) + const vec3 s_cross_e1 = cross(s,dPdBary[0]); + const float v = -dot(normalizedV,s_cross_e1); + // assert(0.f<=v && v<=1.f) + compactBary = vec2(u,v); + // + const float t = dot(dPdBary[1],s_cross_e1); + //assert(t>0.f); + origin = pc.cummon.viewDirReconFactors[3]-normalizedV*t; + } + #else + const vec2 compactBary = unpackUnorm2x16(visBuffer[1]); + #endif + origin = dPdBary*compactBary+lastVxPos; + // normalizedN = load_normal_and_prefetch_textures( batchInstanceData,indices,compactBary,material @@ -77,10 +104,6 @@ void main() ,dBarydScreen #endif ); - - const vec3 origin = dPdBary*compactBary+lastVxPos; - // does this buy us any precision? (answer run CI!) - //normalizedV = normalize(pc.cummon.viewDirReconFactors[3]-origin); // generate rays const uint vertex_depth = 1u; diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index 0d5f2dd27..85f851845 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -536,8 +536,7 @@ uint generate_next_rays( // the 1.03125f adjusts for the fact that the normal might be too short (inversesqrt precision) const float inversesqrt_precision = 1.03125f; - // TODO: investigate why we can't use `normalizedN` here - const vec3 ray_offset_vector = normalize(cross(dPdBary[0],dPdBary[1]))*inversesqrt_precision; + const vec3 ray_offset_vector = normalizedG*inversesqrt_precision; float origin_offset = nbl_glsl_numeric_limits_float_epsilon(120u); // I pulled the constants out of my @$$ origin_offset += dot(abs(ray_offset_vector),abs(origin))*nbl_glsl_numeric_limits_float_epsilon(128u); @@ -551,6 +550,7 @@ uint generate_next_rays( //const vec3 geomNormal = cross(dPdBary[0],dPdBary[1]); //float ray_offset = ?; //ray_offset = nbl_glsl_ieee754_next_ulp_away_from_zero(ray_offset); + const vec3 ray_offset = ray_offset_vector*origin_offset; const vec3 ray_origin[2] = {origin+ray_offset,origin-ray_offset}; uint offset = 0u; From ef000efbe363ef7133263ceb4e49bea0da068423 Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 11 Jul 2025 13:33:13 +0200 Subject: [PATCH 34/41] add a better RWMC=OFF mode, don't try to reuse RWMC code with 2 weird cascades --- 22.RaytracedAO/Renderer.cpp | 5 +-- 22.RaytracedAO/closestHit.comp | 29 ++++++++++------- 22.RaytracedAO/raygen.comp | 52 +++++++++++++++++++----------- 22.RaytracedAO/raytraceCommon.glsl | 5 +++ 22.RaytracedAO/resolve.comp | 11 ++++++- 5 files changed, 68 insertions(+), 34 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 39152931f..4a602fcd3 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -1240,7 +1240,8 @@ void Renderer::initScreenSizedResources( const float RGB19E7_MaxLuma = std::exp2(63.f); if (cascadeCountstd::numeric_limits::min(); if (core::isnan(cascadeLuminanceStart)) cascadeLuminanceStart = baseIsKnown ? (maxEmitterRadianceLuma*std::pow(cascadeLuminanceBase,-cascadeSegmentCount)):Emin; diff --git a/22.RaytracedAO/closestHit.comp b/22.RaytracedAO/closestHit.comp index 032fa4355..fea8483c2 100644 --- a/22.RaytracedAO/closestHit.comp +++ b/22.RaytracedAO/closestHit.comp @@ -118,20 +118,25 @@ void main() contrib.color *= throughput; const vec3 aovThroughput = throughput*aovThroughputScale; // - const bool pathToBeContinued = bool(rayMask); - if (pathToBeContinued) - addAccumulation(contrib.color,accumulationLocation); - else + if (isRWMCEnabled()) { - // need whole path throughput when splatting - contrib.color += fetchAccumulation(accumulationLocation); - const nbl_glsl_RWMC_SplattingParameters splat = nbl_glsl_RWMC_getCascade(staticViewData.cascadeParams,nbl_glsl_MC_colorToScalar(contrib.color)/pc.cummon.rcpFramesDispatched); - for (uint j=0u; j<2u; j++) - addAccumulationCascade( - contrib.color*splat.cascadeWeights[j],accumulationLocation, - samplesPerPixelPerDispatch,splat.lowerCascade+j - ); + const bool pathToBeContinued = bool(rayMask); + if (pathToBeContinued) + addAccumulation(contrib.color,accumulationLocation); + else + { + // need whole path throughput when splatting + contrib.color += fetchAccumulation(accumulationLocation); + const nbl_glsl_RWMC_SplattingParameters splat = nbl_glsl_RWMC_getCascade(staticViewData.cascadeParams,nbl_glsl_MC_colorToScalar(contrib.color)/pc.cummon.rcpFramesDispatched); + for (uint j=0u; j<2u; j++) + addAccumulationCascade( + contrib.color*splat.cascadeWeights[j],accumulationLocation, + samplesPerPixelPerDispatch,splat.lowerCascade+j + ); + } } + else + addAccumulation(contrib.color,accumulationLocation); // addAlbedo(contrib.albedo*aovThroughput,accumulationLocation); addWorldspaceNormal(contrib.worldspaceNormal*nbl_glsl_MC_colorToScalar(aovThroughput),accumulationLocation); diff --git a/22.RaytracedAO/raygen.comp b/22.RaytracedAO/raygen.comp index f2f0b32cb..bb16337c2 100644 --- a/22.RaytracedAO/raygen.comp +++ b/22.RaytracedAO/raygen.comp @@ -130,38 +130,52 @@ void main() { const uvec3 coord = uvec3(outPixelLocation,i); - nbl_glsl_RWMC_SplattingParameters splat = nbl_glsl_RWMC_getCascade(staticViewData.cascadeParams,luma); - const bool pathToBeContinued = bool((rayMask>>i)&0x1u); - if (pathToBeContinued) + if (isRWMCEnabled()) { - storeAccumulation(contrib.color*pc.cummon.rcpFramesDispatched,coord); - splat.cascadeWeights = vec2(0.f,0.f); - } - const uint higherCascade = splat.lowerCascade+1u; - const uint cascadeCount = staticViewData.cascadeParams.penultimateCascadeIx+2u; - for (uint cascadeIx=0u; cascadeIx>i)&0x1u); + if (pathToBeContinued) + { + storeAccumulation(contrib.color*pc.cummon.rcpFramesDispatched,coord); + splat.cascadeWeights = vec2(0.f,0.f); + } + + const uint higherCascade = splat.lowerCascade+1u; + const uint cascadeCount = staticViewData.cascadeParams.penultimateCascadeIx+2u; + for (uint cascadeIx=0u; cascadeIx>31); // clear accumulations totally if beginning a new frame if (firstFrame) { + storeAccumulation(contrib.color,coord); storeAlbedo(contrib.albedo,coord); storeWorldspaceNormal(contrib.worldspaceNormal,coord); storeMask(hideEnvmap&&(!hit) ? 1.f:0.f,coord); } else { + if (!isRWMCEnabled()) + { + const vec3 prev = fetchAccumulation(coord); + const vec3 delta = (contrib.color-prev)*pc.cummon.rcpFramesDispatched; + if (any(greaterThan(delta,vec3(exp2(-19.f))))) + storeAccumulation(prev+delta,coord); + } addAlbedo(contrib.albedo,coord,pc.cummon.rcpFramesDispatched); addWorldspaceNormal(contrib.worldspaceNormal,coord,pc.cummon.rcpFramesDispatched); if (hideEnvmap) diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index 85f851845..b7c6e8b23 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -74,6 +74,11 @@ uvec3 get_triangle_indices(in nbl_glsl_ext_Mitsuba_Loader_instance_data_t batchI #include #include +bool isRWMCEnabled() +{ + return staticViewData.cascadeParams.penultimateCascadeIx!=uint(-2); +} + vec3 fetchAccumulation(in uvec3 coord) { const uvec2 data = imageLoad(accumulation,ivec3(coord)).rg; diff --git a/22.RaytracedAO/resolve.comp b/22.RaytracedAO/resolve.comp index c36b631c3..33541d08a 100644 --- a/22.RaytracedAO/resolve.comp +++ b/22.RaytracedAO/resolve.comp @@ -61,6 +61,10 @@ vec3 nbl_glsl_RWMC_sampleCascadeTexel(ivec2 coord, in ivec2 offset, in uint casc return value/float(samplesPerPixelPerDispatch); } +bool isRWMCEnabled() +{ + return staticViewData.cascadeParams.penultimateCascadeIx!=uint(-2); +} void main() { @@ -69,7 +73,12 @@ void main() { samplesPerPixelPerDispatch = bitfieldExtract(staticViewData.maxPathDepth_noRussianRouletteDepth_samplesPerPixelPerDispatch,16,16); - vec3 acc = nbl_glsl_RWMC_reweight(pc.rwmcReweightingParams,pixelCoord); + vec3 acc; + if (isRWMCEnabled()) + acc = nbl_glsl_RWMC_reweight(pc.rwmcReweightingParams,pixelCoord); + else // its a pretty ok function, reusing it + acc = nbl_glsl_RWMC_sampleCascadeTexel(pixelCoord,ivec2(0,0),-1); + vec3 alb = texelFetch(albedoSamples,ivec3(pixelCoord,0),0).rgb; vec3 nml = nbl_glsl_decodeRGB10A2_SNORM(texelFetch(normalSamples,ivec3(pixelCoord,0),0).r).xyz; float msk = texelFetch(maskSamples,ivec3(pixelCoord,0),0).r; From d945f382ccb81d793bb161b3d3fee270f88e9868 Mon Sep 17 00:00:00 2001 From: devsh Date: Fri, 11 Jul 2025 16:59:36 +0200 Subject: [PATCH 35/41] mother of all typos and premature optimizations --- 22.RaytracedAO/Renderer.cpp | 12 +++++++++++- 22.RaytracedAO/raygen.comp | 2 +- 22.RaytracedAO/raytraceCommon.glsl | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 4a602fcd3..2bbd99905 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -1378,7 +1378,7 @@ void Renderer::initScreenSizedResources( if (static_cast(m_driver)->runningInRenderdoc()) // makes Renderdoc capture the modifications done by OpenCL { interopBuffer.buffer = m_driver->createUpStreamingGPUBufferOnDedMem(size); - //interopBuffer.buffer->getBoundMemory()->mapMemoryRange(IDriverMemoryAllocation::EMCAF_WRITE,{0u,size}) +// interopBuffer.buffer->getBoundMemory()->mapMemoryRange(IDriverMemoryAllocation::EMCAF_READ_AND_WRITE,{0u,size}); } else interopBuffer.buffer = m_driver->createDeviceLocalGPUBufferOnDedMem(size); @@ -2136,7 +2136,17 @@ bool Renderer::traceBounce(uint32_t& raycount) std::cout << "[ERROR] RadeonRays Timed Out" << std::endl; return false; } + + if (static_cast(m_driver)->runningInRenderdoc()) + { + auto touchAllBytes = [](IGPUBuffer* buf)->void + { + auto ptr = reinterpret_cast(buf->getBoundMemory()->getMappedPointer()); + }; + touchAllBytes(m_intersectionBuffer[descSetIx].buffer.get()); + } } + // compute bounce (accumulate contributions and optionally generate rays) { diff --git a/22.RaytracedAO/raygen.comp b/22.RaytracedAO/raygen.comp index bb16337c2..60e6718d7 100644 --- a/22.RaytracedAO/raygen.comp +++ b/22.RaytracedAO/raygen.comp @@ -173,7 +173,7 @@ void main() { const vec3 prev = fetchAccumulation(coord); const vec3 delta = (contrib.color-prev)*pc.cummon.rcpFramesDispatched; - if (any(greaterThan(delta,vec3(exp2(-19.f))))) + if (any(greaterThan(abs(delta),vec3(exp2(-19.f))))) storeAccumulation(prev+delta,coord); } addAlbedo(contrib.albedo,coord,pc.cummon.rcpFramesDispatched); diff --git a/22.RaytracedAO/raytraceCommon.glsl b/22.RaytracedAO/raytraceCommon.glsl index b7c6e8b23..51b403abf 100644 --- a/22.RaytracedAO/raytraceCommon.glsl +++ b/22.RaytracedAO/raytraceCommon.glsl @@ -91,7 +91,7 @@ void storeAccumulation(in vec3 color, in uvec3 coord) } void addAccumulation(in vec3 delta, in uvec3 coord) { - if (any(greaterThan(delta,vec3(exp2(-19.f))))) + if (any(greaterThan(abs(delta),vec3(exp2(-19.f))))) { const vec3 prev = fetchAccumulation(coord); const vec3 newVal = prev+delta; @@ -117,7 +117,7 @@ void nextSampleAccumulationCascade(in vec3 weightedDelta, uvec3 coord, in uint s } void addAccumulationCascade(in vec3 weightedDelta, uvec3 coord, in uint samplesPerPixelPerDispatch, in uint cascadeIndex) { - if (any(greaterThan(weightedDelta,vec3(exp2(-19.f))))) + if (any(greaterThan(abs(weightedDelta),vec3(exp2(-19.f))))) { // but leave first index in the array for the ray accumulation metadata, hence the +1 coord.z += (cascadeIndex+1u)*samplesPerPixelPerDispatch; From 8d9a94e346ad22bbc9c7afbc2ad59aa14a14ecf5 Mon Sep 17 00:00:00 2001 From: Przemek Date: Tue, 15 Jul 2025 21:07:48 +0200 Subject: [PATCH 36/41] Implemented commit hash printing --- 22.RaytracedAO/main.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index c6ffbd84b..855a6ac63 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -272,7 +272,14 @@ int main(int argc, char** argv) for (auto i = 1ul; i < argc; ++i) arguments.emplace_back(argv[i]); } - + std::cout << std::endl; + std::cout << "-- Build URL:" << std::endl; + std::cout << NBL_BUILD_URL << std::endl; + std::cout << std::endl; + std::cout << "-- Build log:" << std::endl; + std::cout << NBL_GIT_LOG << std::endl; + std::cout << std::endl; + bool applicationIsReloaded = false; PersistentState applicationState; { From 99fc650b7505e2899fbc1f46ec3b3f4fde221d5f Mon Sep 17 00:00:00 2001 From: devsh Date: Thu, 17 Jul 2025 10:33:19 +0200 Subject: [PATCH 37/41] how on earth did RWMC even work before !? --- 22.RaytracedAO/Renderer.cpp | 2 ++ 22.RaytracedAO/closestHit.comp | 2 +- 22.RaytracedAO/main.cpp | 8 ++++++++ 22.RaytracedAO/raygen.comp | 5 +++-- 22.RaytracedAO/raytraceCommon.glsl | 10 ++++------ 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/22.RaytracedAO/Renderer.cpp b/22.RaytracedAO/Renderer.cpp index 2bbd99905..265f4986d 100644 --- a/22.RaytracedAO/Renderer.cpp +++ b/22.RaytracedAO/Renderer.cpp @@ -1254,6 +1254,8 @@ void Renderer::initScreenSizedResources( const bool baseIsKnown = cascadeLuminanceBase>std::numeric_limits::min(); if (core::isnan(cascadeLuminanceStart)) cascadeLuminanceStart = baseIsKnown ? (maxEmitterRadianceLuma*std::pow(cascadeLuminanceBase,-cascadeSegmentCount)):Emin; + // rationale, we don't have NEE and BRDF importance sampling samples with throughput <= 1.0 + // However we have RIS, and that can complicate this assumption a bit if (!baseIsKnown) cascadeLuminanceBase = core::max(std::pow(maxEmitterRadianceLuma/cascadeLuminanceStart,1.f/cascadeSegmentCount),1.0625f); std::cout << "Re-Weighting Monte Carlo = ENABLED [cascadeCount: "< Date: Thu, 17 Jul 2025 13:50:03 +0200 Subject: [PATCH 38/41] undo the RWMC forcing --- 22.RaytracedAO/main.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/22.RaytracedAO/main.cpp b/22.RaytracedAO/main.cpp index 68c4735a7..855a6ac63 100644 --- a/22.RaytracedAO/main.cpp +++ b/22.RaytracedAO/main.cpp @@ -643,19 +643,11 @@ int main(int argc, char** argv) mainSensorData.denoiserInfo.bloomIntensity = film.denoiserBloomIntensity; mainSensorData.denoiserInfo.tonemapperArgs = std::string(film.denoiserTonemapperArgs); mainSensorData.fileFormat = film.fileFormat; -#ifndef UBER_TEST - mainSensorData.cascadeCount = 6; - mainSensorData.cascadeLuminanceBase = film.cascadeLuminanceBase; - mainSensorData.cascadeLuminanceStart = film.cascadeLuminanceStart; - mainSensorData.kappa = 1.f; - mainSensorData.Emin = 1.7f; -#else mainSensorData.cascadeCount = film.cascadeCount; mainSensorData.cascadeLuminanceBase = film.cascadeLuminanceBase; mainSensorData.cascadeLuminanceStart = film.cascadeLuminanceStart; mainSensorData.kappa = mainSensorData.cascadeCount<2 ? 0.f:film.rfilter.kappa; mainSensorData.Emin = film.rfilter.Emin; -#endif mainSensorData.envmapRegFactor = core::clamp(film.envmapRegularizationFactor, 0.0f, 0.8f); mainSensorData.outputFilePath = std::filesystem::path(film.outputFilePath); // handle missing output path From 1fe969745304a3492b04c3d1902561d92f467827 Mon Sep 17 00:00:00 2001 From: Mateusz Kielan Date: Sat, 13 Sep 2025 21:02:26 +0200 Subject: [PATCH 39/41] Update readme just not to loose the branch --- 22.RaytracedAO/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/22.RaytracedAO/README.md b/22.RaytracedAO/README.md index 03bec2bc1..2b6947322 100644 --- a/22.RaytracedAO/README.md +++ b/22.RaytracedAO/README.md @@ -48,6 +48,7 @@ Example Usages : ## New mitsuba properties and tags + Multiple Sensor tags in mitsuba XML's is now supported. This feature helps you have multiple views with different camera and film parameters without needing to execute the renderer and load again. You can switch between those sensors using `PAGE UP/DOWN` Keys defined in more detail below. From b742c70c5abc6feaf22ebb56cbae8a2bbcfa12f9 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 28 Oct 2025 16:31:58 +0100 Subject: [PATCH 40/41] note some TODOs for Arek --- 50.IESProfileTest/compute/cdc.comp | 1 + 1 file changed, 1 insertion(+) diff --git a/50.IESProfileTest/compute/cdc.comp b/50.IESProfileTest/compute/cdc.comp index 390d63acb..27eeabf79 100644 --- a/50.IESProfileTest/compute/cdc.comp +++ b/50.IESProfileTest/compute/cdc.comp @@ -36,6 +36,7 @@ layout(push_constant) uniform PushConstants uint dummy; } pc; +// TODO: use the common builtin headers for all this! vec3 octahedronUVToDir(vec2 uv) { vec3 position = vec3((uv * 2.0 - 1.0).xy, 0.0); From cd3efce0ac9b498bd8569a68fb9ddef0f6f19332 Mon Sep 17 00:00:00 2001 From: devsh Date: Tue, 28 Oct 2025 17:21:09 +0100 Subject: [PATCH 41/41] @AnastaZIuk I just fix the signature not the visualization (has no clue about corner sampling the IES) --- 50.IESProfileTest/shader.frag | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/50.IESProfileTest/shader.frag b/50.IESProfileTest/shader.frag index ec7a00b8f..0f1c2883b 100644 --- a/50.IESProfileTest/shader.frag +++ b/50.IESProfileTest/shader.frag @@ -30,8 +30,11 @@ float plot(float cand, float pct, float bold){ } // vertical cut of IES (i.e. cut by plane x = 0) -float f(vec2 uv) { - return texture(inIESCandelaImage,nbl_glsl_IES_convert_dir_to_uv(normalize(vec3(uv.x, 0.001, uv.y)))).x; +float f(vec2 uv) +{ + const vec3 dir = normalize(vec3(uv.x, 0.001, uv.y)); + const vec2 octUV = nbl_glsl_IES_convert_dir_to_uv(dir,vec2(0.5)-vec2(0.5)/textureSize(inIESCandelaImage,0).xy); + return textureLod(inIESCandelaImage,octUV,0).x; // float vangle = (abs(atan(uv.x,uv.y)))/(M_PI); // float hangle = uv.x <= 0.0 ? 0.0 : 1.0; // return texture(inIESCandelaImage,vec2(hangle,vangle)).x;