EEVEE-Next: Port LUT generation code

This ports the LUT using compute shader.
All LUT are computed by the same compute shader
to avoid boiler plate code to add new LUTs.

As for the generation code itself it is mostly the
same except for the use of `hammersley_2d` instead of
regular grid sampling. Regular grid did not improve
anything and was a bit more cumbersome.

This also bumps the number of samples very high
for more precision.

The new utility class for computing the LUT has
methods to write the content to a PFM image file
or as C++ array header.
This commit is contained in:
Clément Foucault
2023-09-03 13:33:38 +02:00
parent 7d2c854261
commit 7b54fce723
11 changed files with 409 additions and 17 deletions

View File

@@ -150,6 +150,7 @@ set(SRC
engines/eevee_next/eevee_lightcache.cc
engines/eevee_next/eevee_lightprobe.cc
engines/eevee_next/eevee_lookdev.cc
engines/eevee_next/eevee_lut.cc
engines/eevee_next/eevee_material.cc
engines/eevee_next/eevee_motion_blur.cc
engines/eevee_next/eevee_pipeline.cc
@@ -298,6 +299,7 @@ set(SRC
engines/eevee_next/eevee_lightcache.hh
engines/eevee_next/eevee_lightprobe.hh
engines/eevee_next/eevee_lookdev.hh
engines/eevee_next/eevee_lut.hh
engines/eevee_next/eevee_material.hh
engines/eevee_next/eevee_motion_blur.hh
engines/eevee_next/eevee_pipeline.hh
@@ -525,6 +527,7 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_lightprobe_irradiance_load_comp.glsl
engines/eevee_next/shaders/eevee_lightprobe_lib.glsl
engines/eevee_next/shaders/eevee_ltc_lib.glsl
engines/eevee_next/shaders/eevee_lut_comp.glsl
engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl

View File

@@ -13,6 +13,9 @@
# pragma once
#endif
/* Look Up Tables. */
#define LUT_WORKGROUP_SIZE 16
/* Hierarchical Z down-sampling. */
#define HIZ_MIP_COUNT 8
/* NOTE: The shader is written to update 5 mipmaps using LDS. */

View File

@@ -0,0 +1,45 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup eevee
*
* LUT generation module.
*/
#include "eevee_lut.hh"
namespace blender::eevee {
LookUpTable::LookUpTable(draw::Manager &manager, LookUpTableType table_type, int3 table_extent)
{
table_extent_ = table_extent;
eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_WRITE | GPU_TEXTURE_USAGE_HOST_READ;
Texture table_tx = {"LUT Precompute"};
table_tx.ensure_3d(GPU_RGBA32F, table_extent, usage);
GPUShader *shader = GPU_shader_create_from_info_name("eevee_lut");
PassSimple lut_ps = {"LUT Precompute"};
lut_ps.shader_set(shader);
lut_ps.push_constant("table_type", int(table_type));
lut_ps.push_constant("table_extent", table_extent);
lut_ps.bind_image("table_img", table_tx);
lut_ps.dispatch(math::divide_ceil(table_extent, int3(int2(LUT_WORKGROUP_SIZE), 1)));
lut_ps.barrier(GPU_BARRIER_TEXTURE_UPDATE);
manager.submit(lut_ps);
raw_data_ = table_tx.read<float4>(GPU_DATA_FLOAT);
GPU_shader_free(shader);
}
LookUpTable::~LookUpTable()
{
MEM_SAFE_FREE(raw_data_);
}
} // namespace blender::eevee

View File

@@ -0,0 +1,170 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup eevee
*
* LUT generation module.
*/
#pragma once
#include "BLI_math_vector_types.hh"
#include "eevee_shader_shared.hh"
#include <fstream>
namespace blender::eevee {
/**
* Create a look-up table of the specified type using GPU compute.
* Not to be used at runtime in final release.
* Usage example: `LookUpTable(manager, LUT_GGX_BRDF_SPLIT_SUM, {64, 64, 1}).data<float2>()`
*/
class LookUpTable {
private:
int3 table_extent_;
float4 *raw_data_ = nullptr;
public:
LookUpTable(draw::Manager &manager, LookUpTableType table_type, int3 table_extent);
~LookUpTable();
/* Cast each pixel data to type `T`. */
template<typename T> Vector<T> data()
{
int64_t table_len = table_extent_.x * table_extent_.y * table_extent_.z;
Vector<T> out_data(table_len);
for (auto i : IndexRange(table_len)) {
out_data[i] = T(raw_data_[i]);
}
return out_data;
}
/**
* Write a the content of a texture to a PFM image file for inspection.
* OpenGL texture coordinate convention with Y up is respected.
*/
template<typename VecT>
static void write_to_pfm(StringRefNull name,
Span<VecT> pixels,
int64_t n_x,
int64_t n_y = 1,
int64_t n_z = 1,
int64_t n_w = 1)
{
BLI_STATIC_ASSERT(VecT::type_length < 4, "4 component PFM are not possible");
std::ofstream file;
/* Write PFM header. */
file.open(std::string(name) + ".pfm");
file << "PF\n";
file << n_x * n_z << " " << n_y * n_w << "\n";
#ifdef __LITTLE_ENDIAN__
file << "-1.0\n";
#else
file << "1.0\n";
#endif
file.close();
/* Write binary float content. */
file.open(std::string(name) + ".pfm", std::ios_base::app | std::ios::out | std::ios::binary);
/* Iterate over destination pixels. */
for (int64_t y : IndexRange(n_y * n_w)) {
for (int64_t x : IndexRange(n_x * n_z)) {
int64_t src_w = y / n_y;
int64_t src_z = x / n_x;
int64_t src_y = y % n_y;
int64_t src_x = x % n_x;
int64_t src = (n_x * n_y * n_z * src_w) + (n_x * n_y * src_z) + (n_x * src_y) + src_x;
float3 data(0.0f);
for (auto c : IndexRange(VecT::type_length)) {
data[c] = pixels[src][c];
}
file.write(reinterpret_cast<char *>(&data), sizeof(float3));
}
}
file.close();
}
/**
* Write a the content of a texture as a C++ header file array.
* The content is to be copied to `eevee_lut.cc` and formated with `make format`.
*/
template<typename VecT>
static void write_to_header(StringRefNull name,
Span<VecT> pixels,
int64_t n_x,
int64_t n_y = 1,
int64_t n_z = 1,
int64_t n_w = 1)
{
std::ofstream file;
file.open(std::string(name) + ".hh");
file << "static const VecBase<float, " << VecT::type_length << "> table_" << name;
if (n_w > 1) {
file << "[" << n_w << "]";
}
if (n_z > 1) {
file << "[" << n_z << "]";
}
if (n_y > 1) {
file << "[" << n_y << "]";
}
if (n_x > 1) {
file << "[" << n_x << "]";
}
file << " = {\n";
/* Print data formatted as C++ array. */
for (auto w : IndexRange(n_w)) {
if (n_w > 1) {
file << "{\n";
}
for (auto z : IndexRange(n_z)) {
if (n_z > 1 || n_w > 1) {
file << "{\n";
}
for (auto y : IndexRange(n_y)) {
if (n_y > 1 || n_z > 1 || n_w > 1) {
file << "{\n";
}
for (auto x : IndexRange(n_x)) {
if (n_x > 1 || n_y > 1 || n_z > 1 || n_w > 1) {
file << "{";
}
int64_t pixel_index = (n_x * n_y * n_z * w) + (n_x * n_y * z) + (n_x * y) + x;
for (auto c : IndexRange(VecT::type_length)) {
file << std::to_string(pixels[pixel_index][c]);
if (c + 1 < VecT::type_length) {
file << "f, ";
}
else {
file << "f";
}
}
if (n_x > 1 || n_y > 1 || n_z > 1 || n_w > 1) {
file << (x + 1 < n_x ? "}, " : "}");
}
}
if (n_y > 1 || n_z > 1 || n_w > 1) {
file << (y + 1 < n_y ? "},\n" : "}\n");
}
}
if (n_z > 1 || n_w > 1) {
file << (z + 1 < n_z ? "},\n" : "}\n");
}
}
if (n_w > 1) {
file << (w + 1 < n_w ? "},\n" : "}\n");
}
}
file << "};\n";
file.close();
}
};
} // namespace blender::eevee

View File

@@ -82,6 +82,17 @@ enum eDebugMode : uint32_t {
/** \} */
/* -------------------------------------------------------------------- */
/** \name Look-Up Table Generation
* \{ */
enum LookUpTableType : uint32_t {
LUT_GGX_BRDF_SPLIT_SUM = 0u,
LUT_GGX_BTDF_SPLIT_SUM = 1u,
};
/** \} */
/* -------------------------------------------------------------------- */
/** \name Sampling
* \{ */

View File

@@ -106,3 +106,26 @@ float bsdf_lambert(vec3 N, vec3 L)
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Utils
* \{ */
/* Fresnel monochromatic, perfect mirror */
float F_eta(float eta, float cos_theta)
{
/* Compute fresnel reflectance without explicitly computing
* the refracted direction. */
float c = abs(cos_theta);
float g = eta * eta - 1.0 + c * c;
if (g > 0.0) {
g = sqrt(g);
float A = (g - c) / (g + c);
float B = (c * (g + c) - 1.0) / (c * (g - c) + 1.0);
return 0.5 * A * A * (1.0 + B * B);
}
/* Total internal reflections. */
return 1.0;
}
/** \} */

View File

@@ -79,6 +79,12 @@ vec3 sample_ggx(vec3 rand, float alpha, vec3 Vt, out float G1)
#endif
}
vec3 sample_ggx(vec3 rand, float alpha, vec3 Vt)
{
float G1_unused;
return sample_ggx(rand, alpha, Vt, G1_unused);
}
/**
* Returns a reflected ray direction following the GGX distribution.
*

View File

@@ -0,0 +1,133 @@
/* SPDX-FileCopyrightText: 2017-2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* Used to generate Look Up Tables. Not used in default configuration as the tables are stored in
* the blender executable. This is only used for reference or to update them.
*/
#pragma BLENDER_REQUIRE(gpu_shader_math_base_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_bxdf_sampling_lib.glsl)
/* Generate BRDF LUT following "Real shading in unreal engine 4" by Brian Karis
* https://cdn2.unrealengine.com/Resources/files/2013SiggraphPresentationsNotes-26915738.pdf
* Parametrizing with `x = roughness` and `y = sqrt(1.0 - cos(theta))`.
* The result is interpreted as: `integral = f0 * scale + f90 * bias`. */
vec4 ggx_brdf_split_sum(vec3 lut_coord)
{
/* Squaring for perceptually linear roughness, see [Physically Based Shading at Disney]
* (https://media.disneyanimation.com/uploads/production/publication_asset/48/asset/s2012_pbs_disney_brdf_notes_v3.pdf)
* Section 5.4. */
float roughness = square(lut_coord.x);
float roughness_sq = square(roughness);
float NV = clamp(1.0 - square(lut_coord.y), 1e-4, 0.9999);
vec3 V = vec3(sqrt(1.0 - square(NV)), 0.0, NV);
/* Integrating BRDF. */
float scale = 0.0;
float bias = 0.0;
const uint sample_count = 512u * 512u;
for (uint i = 0u; i < sample_count; i++) {
vec2 rand = hammersley_2d(i, sample_count);
vec3 Xi = sample_cylinder(rand);
/* Microfacet normal. */
vec3 H = sample_ggx(Xi, roughness, V);
vec3 L = -reflect(V, H);
float NL = L.z;
if (NL > 0.0) {
/* Assuming sample visible normals, `weight = brdf * NV / (pdf * fresnel).` */
float weight = bxdf_ggx_smith_G1(NL, roughness_sq);
/* Schlick's Fresnel. */
float s = saturate(pow5f(1.0 - saturate(dot(V, H))));
scale += (1.0 - s) * weight;
bias += s * weight;
}
}
scale /= float(sample_count);
bias /= float(sample_count);
return vec4(scale, bias, 0.0, 0.0);
}
/* Generate BSDF LUT for `IOR < 1`. Returns the transmittance and the reflectance. */
vec4 ggx_btdf_split_sum(vec3 lut_coord)
{
float ior = sqrt(lut_coord.x);
/* ior is sin of critical angle. */
float critical_cos = sqrt(1.0 - saturate(square(ior)));
lut_coord.y = lut_coord.y * 2.0 - 1.0;
/* Maximize texture usage on both sides of the critical angle. */
lut_coord.y *= (lut_coord.y > 0.0) ? (1.0 - critical_cos) : critical_cos;
/* Center LUT around critical angle to avoid strange interpolation issues when the critical
* angle is changing. */
lut_coord.y += critical_cos;
float NV = clamp(lut_coord.y, 1e-4, 0.9999);
/* Squaring for perceptually linear roughness, see [Physically Based Shading at Disney]
* (https://media.disneyanimation.com/uploads/production/publication_asset/48/asset/s2012_pbs_disney_brdf_notes_v3.pdf)
* Section 5.4. */
float roughness = square(lut_coord.z);
float roughness_sq = square(roughness);
vec3 V = vec3(sqrt(1.0 - square(NV)), 0.0, NV);
/* Integrating BSDF */
float transmittance = 0.0;
float reflectance = 0.0;
const uint sample_count = 512u * 512u;
for (uint i = 0u; i < sample_count; i++) {
vec2 rand = hammersley_2d(i, sample_count);
vec3 Xi = sample_cylinder(rand);
/* Microfacet normal. */
vec3 H = sample_ggx(Xi, roughness, V);
float fresnel = F_eta(ior, dot(V, H));
/* Reflection. */
vec3 R = -reflect(V, H);
float NR = R.z;
if (NR > 0.0) {
/* Assuming sample visible normals, accumulating `brdf * NV / pdf.` */
reflectance += fresnel * bxdf_ggx_smith_G1(NR, roughness_sq);
}
/* Refraction. */
vec3 T = refract(-V, H, ior);
float NT = T.z;
/* In the case of TIR, `T == vec3(0)`. */
if (NT < 0.0) {
/* Assuming sample visible normals, accumulating `btdf * NV / pdf.` */
transmittance += (1.0 - fresnel) * bxdf_ggx_smith_G1(NT, roughness_sq);
}
}
transmittance /= float(sample_count);
reflectance /= float(sample_count);
/* There is place to put multi-scatter result (which is a little bit different still)
* and / or lobe fitting for better sampling of. */
return vec4(transmittance, reflectance, 0.0, 0.0);
}
void main()
{
/* Make sure coordinates are covering the whole [0..1] range at texel center. */
vec3 lut_normalized_coordinate = vec3(gl_GlobalInvocationID) / vec3(table_extent - 1);
/* Make sure missing cases are noticeable. */
vec4 result = vec4(-1);
switch (uint(table_type)) {
case LUT_GGX_BRDF_SPLIT_SUM:
result = ggx_brdf_split_sum(lut_normalized_coordinate);
break;
case LUT_GGX_BTDF_SPLIT_SUM:
result = ggx_btdf_split_sum(lut_normalized_coordinate);
break;
}
imageStore(table_img, ivec3(gl_GlobalInvocationID), result);
}

View File

@@ -256,23 +256,6 @@ float nodetree_thickness();
vec4 closure_to_rgba(Closure cl);
#endif
/* Fresnel monochromatic, perfect mirror */
float F_eta(float eta, float cos_theta)
{
/* Compute fresnel reflectance without explicitly computing
* the refracted direction. */
float c = abs(cos_theta);
float g = eta * eta - 1.0 + c * c;
if (g > 0.0) {
g = sqrt(g);
float A = (g - c) / (g + c);
float B = (c * (g + c) - 1.0) / (c * (g - c) + 1.0);
return 0.5 * A * A * (1.0 + B * B);
}
/* Total internal reflections. */
return 1.0;
}
/* Simplified form of F_eta(eta, 1.0). */
float F0_from_ior(float eta)
{

View File

@@ -0,0 +1,14 @@
/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "eevee_defines.hh"
GPU_SHADER_CREATE_INFO(eevee_lut)
.local_group_size(LUT_WORKGROUP_SIZE, LUT_WORKGROUP_SIZE, 1)
.push_constant(Type::INT, "table_type")
.push_constant(Type::IVEC3, "table_extent")
.image(0, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_3D, "table_img")
.additional_info("eevee_shared")
.compute_source("eevee_lut_comp.glsl")
.do_static_compilation(true);

View File

@@ -652,6 +652,7 @@ set(SRC_SHADER_CREATE_INFOS
../draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_irradiance_cache_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_lut_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
../draw/engines/eevee_next/shaders/infos/eevee_reflection_probe_info.hh