EEVEE-Next: Port LUT generation code

This ports the LUT using compute shader. All LUT are computed by the same compute shader to avoid boiler plate code to add new LUTs. As for the generation code itself it is mostly the same except for the use of `hammersley_2d` instead of regular grid sampling. Regular grid did not improve anything and was a bit more cumbersome. This also bumps the number of samples very high for more precision. The new utility class for computing the LUT has methods to write the content to a PFM image file or as C++ array header.
2023-09-03 13:33:38 +02:00
parent 7d2c854261
commit 7b54fce723
11 changed files with 409 additions and 17 deletions
--- a/source/blender/draw/CMakeLists.txt
+++ b/source/blender/draw/CMakeLists.txt
@@ -150,6 +150,7 @@ set(SRC
  engines/eevee_next/eevee_lightcache.cc
  engines/eevee_next/eevee_lightprobe.cc
  engines/eevee_next/eevee_lookdev.cc
+  engines/eevee_next/eevee_lut.cc
  engines/eevee_next/eevee_material.cc
  engines/eevee_next/eevee_motion_blur.cc
  engines/eevee_next/eevee_pipeline.cc
@@ -298,6 +299,7 @@ set(SRC
  engines/eevee_next/eevee_lightcache.hh
  engines/eevee_next/eevee_lightprobe.hh
  engines/eevee_next/eevee_lookdev.hh
+  engines/eevee_next/eevee_lut.hh
  engines/eevee_next/eevee_material.hh
  engines/eevee_next/eevee_motion_blur.hh
  engines/eevee_next/eevee_pipeline.hh
@@ -525,6 +527,7 @@ set(GLSL_SRC
  engines/eevee_next/shaders/eevee_lightprobe_irradiance_load_comp.glsl
  engines/eevee_next/shaders/eevee_lightprobe_lib.glsl
  engines/eevee_next/shaders/eevee_ltc_lib.glsl
+  engines/eevee_next/shaders/eevee_lut_comp.glsl
  engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl
  engines/eevee_next/shaders/eevee_motion_blur_flatten_comp.glsl
  engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl
--- a/source/blender/draw/engines/eevee_next/eevee_defines.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_defines.hh
@@ -13,6 +13,9 @@
 #  pragma once
 #endif

+/* Look Up Tables. */
+#define LUT_WORKGROUP_SIZE 16
+
 /* Hierarchical Z down-sampling. */
 #define HIZ_MIP_COUNT 8
 /* NOTE: The shader is written to update 5 mipmaps using LDS. */
--- a/source/blender/draw/engines/eevee_next/eevee_lut.cc
+++ b/source/blender/draw/engines/eevee_next/eevee_lut.cc
@@ -0,0 +1,45 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup eevee
+ *
+ * LUT generation module.
+ */
+
+#include "eevee_lut.hh"
+
+namespace blender::eevee {
+
+LookUpTable::LookUpTable(draw::Manager &manager, LookUpTableType table_type, int3 table_extent)
+{
+  table_extent_ = table_extent;
+
+  eGPUTextureUsage usage = GPU_TEXTURE_USAGE_SHADER_WRITE | GPU_TEXTURE_USAGE_HOST_READ;
+  Texture table_tx = {"LUT Precompute"};
+  table_tx.ensure_3d(GPU_RGBA32F, table_extent, usage);
+
+  GPUShader *shader = GPU_shader_create_from_info_name("eevee_lut");
+
+  PassSimple lut_ps = {"LUT Precompute"};
+  lut_ps.shader_set(shader);
+  lut_ps.push_constant("table_type", int(table_type));
+  lut_ps.push_constant("table_extent", table_extent);
+  lut_ps.bind_image("table_img", table_tx);
+  lut_ps.dispatch(math::divide_ceil(table_extent, int3(int2(LUT_WORKGROUP_SIZE), 1)));
+  lut_ps.barrier(GPU_BARRIER_TEXTURE_UPDATE);
+
+  manager.submit(lut_ps);
+
+  raw_data_ = table_tx.read<float4>(GPU_DATA_FLOAT);
+
+  GPU_shader_free(shader);
+}
+
+LookUpTable::~LookUpTable()
+{
+  MEM_SAFE_FREE(raw_data_);
+}
+
+}  // namespace blender::eevee
--- a/source/blender/draw/engines/eevee_next/eevee_lut.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_lut.hh
@@ -0,0 +1,170 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup eevee
+ *
+ * LUT generation module.
+ */
+
+#pragma once
+
+#include "BLI_math_vector_types.hh"
+
+#include "eevee_shader_shared.hh"
+
+#include <fstream>
+
+namespace blender::eevee {
+
+/**
+ * Create a look-up table of the specified type using GPU compute.
+ * Not to be used at runtime in final release.
+ * Usage example: `LookUpTable(manager, LUT_GGX_BRDF_SPLIT_SUM, {64, 64, 1}).data<float2>()`
+ */
+class LookUpTable {
+ private:
+  int3 table_extent_;
+  float4 *raw_data_ = nullptr;
+
+ public:
+  LookUpTable(draw::Manager &manager, LookUpTableType table_type, int3 table_extent);
+  ~LookUpTable();
+
+  /* Cast each pixel data to type `T`. */
+  template<typename T> Vector<T> data()
+  {
+    int64_t table_len = table_extent_.x * table_extent_.y * table_extent_.z;
+    Vector<T> out_data(table_len);
+    for (auto i : IndexRange(table_len)) {
+      out_data[i] = T(raw_data_[i]);
+    }
+    return out_data;
+  }
+
+  /**
+   * Write a the content of a texture to a PFM image file for inspection.
+   * OpenGL texture coordinate convention with Y up is respected.
+   */
+  template<typename VecT>
+  static void write_to_pfm(StringRefNull name,
+                           Span<VecT> pixels,
+                           int64_t n_x,
+                           int64_t n_y = 1,
+                           int64_t n_z = 1,
+                           int64_t n_w = 1)
+  {
+    BLI_STATIC_ASSERT(VecT::type_length < 4, "4 component PFM are not possible");
+
+    std::ofstream file;
+
+    /* Write PFM header. */
+    file.open(std::string(name) + ".pfm");
+    file << "PF\n";
+    file << n_x * n_z << " " << n_y * n_w << "\n";
+#ifdef __LITTLE_ENDIAN__
+    file << "-1.0\n";
+#else
+    file << "1.0\n";
+#endif
+    file.close();
+
+    /* Write binary float content. */
+    file.open(std::string(name) + ".pfm", std::ios_base::app | std::ios::out | std::ios::binary);
+    /* Iterate over destination pixels. */
+    for (int64_t y : IndexRange(n_y * n_w)) {
+      for (int64_t x : IndexRange(n_x * n_z)) {
+        int64_t src_w = y / n_y;
+        int64_t src_z = x / n_x;
+        int64_t src_y = y % n_y;
+        int64_t src_x = x % n_x;
+        int64_t src = (n_x * n_y * n_z * src_w) + (n_x * n_y * src_z) + (n_x * src_y) + src_x;
+        float3 data(0.0f);
+        for (auto c : IndexRange(VecT::type_length)) {
+          data[c] = pixels[src][c];
+        }
+        file.write(reinterpret_cast<char *>(&data), sizeof(float3));
+      }
+    }
+    file.close();
+  }
+
+  /**
+   * Write a the content of a texture as a C++ header file array.
+   * The content is to be copied to `eevee_lut.cc` and formated with `make format`.
+   */
+  template<typename VecT>
+  static void write_to_header(StringRefNull name,
+                              Span<VecT> pixels,
+                              int64_t n_x,
+                              int64_t n_y = 1,
+                              int64_t n_z = 1,
+                              int64_t n_w = 1)
+  {
+    std::ofstream file;
+
+    file.open(std::string(name) + ".hh");
+    file << "static const VecBase<float, " << VecT::type_length << "> table_" << name;
+    if (n_w > 1) {
+      file << "[" << n_w << "]";
+    }
+    if (n_z > 1) {
+      file << "[" << n_z << "]";
+    }
+    if (n_y > 1) {
+      file << "[" << n_y << "]";
+    }
+    if (n_x > 1) {
+      file << "[" << n_x << "]";
+    }
+    file << " = {\n";
+    /* Print data formatted as C++ array. */
+    for (auto w : IndexRange(n_w)) {
+      if (n_w > 1) {
+        file << "{\n";
+      }
+      for (auto z : IndexRange(n_z)) {
+        if (n_z > 1 || n_w > 1) {
+          file << "{\n";
+        }
+        for (auto y : IndexRange(n_y)) {
+          if (n_y > 1 || n_z > 1 || n_w > 1) {
+            file << "{\n";
+          }
+          for (auto x : IndexRange(n_x)) {
+            if (n_x > 1 || n_y > 1 || n_z > 1 || n_w > 1) {
+              file << "{";
+            }
+            int64_t pixel_index = (n_x * n_y * n_z * w) + (n_x * n_y * z) + (n_x * y) + x;
+            for (auto c : IndexRange(VecT::type_length)) {
+              file << std::to_string(pixels[pixel_index][c]);
+              if (c + 1 < VecT::type_length) {
+                file << "f, ";
+              }
+              else {
+                file << "f";
+              }
+            }
+            if (n_x > 1 || n_y > 1 || n_z > 1 || n_w > 1) {
+              file << (x + 1 < n_x ? "}, " : "}");
+            }
+          }
+          if (n_y > 1 || n_z > 1 || n_w > 1) {
+            file << (y + 1 < n_y ? "},\n" : "}\n");
+          }
+        }
+        if (n_z > 1 || n_w > 1) {
+          file << (z + 1 < n_z ? "},\n" : "}\n");
+        }
+      }
+      if (n_w > 1) {
+        file << (w + 1 < n_w ? "},\n" : "}\n");
+      }
+    }
+    file << "};\n";
+    file.close();
+  }
+};
+
+}  // namespace blender::eevee
--- a/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
+++ b/source/blender/draw/engines/eevee_next/eevee_shader_shared.hh
@@ -82,6 +82,17 @@ enum eDebugMode : uint32_t {

 /** \} */

+/* -------------------------------------------------------------------- */
+/** \name Look-Up Table Generation
+ * \{ */
+
+enum LookUpTableType : uint32_t {
+  LUT_GGX_BRDF_SPLIT_SUM = 0u,
+  LUT_GGX_BTDF_SPLIT_SUM = 1u,
+};
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name Sampling
 * \{ */
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_lib.glsl
@@ -106,3 +106,26 @@ float bsdf_lambert(vec3 N, vec3 L)
 }

 /** \} */
+
+/* -------------------------------------------------------------------- */
+/** \name Utils
+ * \{ */
+
+/* Fresnel monochromatic, perfect mirror */
+float F_eta(float eta, float cos_theta)
+{
+  /* Compute fresnel reflectance without explicitly computing
+   * the refracted direction. */
+  float c = abs(cos_theta);
+  float g = eta * eta - 1.0 + c * c;
+  if (g > 0.0) {
+    g = sqrt(g);
+    float A = (g - c) / (g + c);
+    float B = (c * (g + c) - 1.0) / (c * (g - c) + 1.0);
+    return 0.5 * A * A * (1.0 + B * B);
+  }
+  /* Total internal reflections. */
+  return 1.0;
+}
+
+/** \} */
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_sampling_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_bxdf_sampling_lib.glsl
@@ -79,6 +79,12 @@ vec3 sample_ggx(vec3 rand, float alpha, vec3 Vt, out float G1)
 #endif
 }

+vec3 sample_ggx(vec3 rand, float alpha, vec3 Vt)
+{
+  float G1_unused;
+  return sample_ggx(rand, alpha, Vt, G1_unused);
+}
+
 /**
 * Returns a reflected ray direction following the GGX distribution.
 *
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_lut_comp.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_lut_comp.glsl
@@ -0,0 +1,133 @@
+/* SPDX-FileCopyrightText: 2017-2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/**
+ * Used to generate Look Up Tables. Not used in default configuration as the tables are stored in
+ * the blender executable. This is only used for reference or to update them.
+ */
+
+#pragma BLENDER_REQUIRE(gpu_shader_math_base_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
+#pragma BLENDER_REQUIRE(eevee_bxdf_sampling_lib.glsl)
+
+/* Generate BRDF LUT following "Real shading in unreal engine 4" by Brian Karis
+ * https://cdn2.unrealengine.com/Resources/files/2013SiggraphPresentationsNotes-26915738.pdf
+ * Parametrizing with `x = roughness` and `y = sqrt(1.0 - cos(theta))`.
+ * The result is interpreted as: `integral = f0 * scale + f90 * bias`. */
+vec4 ggx_brdf_split_sum(vec3 lut_coord)
+{
+  /* Squaring for perceptually linear roughness, see [Physically Based Shading at Disney]
+   * (https://media.disneyanimation.com/uploads/production/publication_asset/48/asset/s2012_pbs_disney_brdf_notes_v3.pdf)
+   * Section 5.4. */
+  float roughness = square(lut_coord.x);
+  float roughness_sq = square(roughness);
+
+  float NV = clamp(1.0 - square(lut_coord.y), 1e-4, 0.9999);
+  vec3 V = vec3(sqrt(1.0 - square(NV)), 0.0, NV);
+
+  /* Integrating BRDF. */
+  float scale = 0.0;
+  float bias = 0.0;
+  const uint sample_count = 512u * 512u;
+  for (uint i = 0u; i < sample_count; i++) {
+    vec2 rand = hammersley_2d(i, sample_count);
+    vec3 Xi = sample_cylinder(rand);
+
+    /* Microfacet normal. */
+    vec3 H = sample_ggx(Xi, roughness, V);
+    vec3 L = -reflect(V, H);
+    float NL = L.z;
+
+    if (NL > 0.0) {
+      /* Assuming sample visible normals, `weight = brdf * NV / (pdf * fresnel).` */
+      float weight = bxdf_ggx_smith_G1(NL, roughness_sq);
+      /* Schlick's Fresnel. */
+      float s = saturate(pow5f(1.0 - saturate(dot(V, H))));
+      scale += (1.0 - s) * weight;
+      bias += s * weight;
+    }
+  }
+  scale /= float(sample_count);
+  bias /= float(sample_count);
+
+  return vec4(scale, bias, 0.0, 0.0);
+}
+
+/* Generate BSDF LUT for `IOR < 1`. Returns the transmittance and the reflectance. */
+vec4 ggx_btdf_split_sum(vec3 lut_coord)
+{
+  float ior = sqrt(lut_coord.x);
+  /* ior is sin of critical angle. */
+  float critical_cos = sqrt(1.0 - saturate(square(ior)));
+
+  lut_coord.y = lut_coord.y * 2.0 - 1.0;
+  /* Maximize texture usage on both sides of the critical angle. */
+  lut_coord.y *= (lut_coord.y > 0.0) ? (1.0 - critical_cos) : critical_cos;
+  /* Center LUT around critical angle to avoid strange interpolation issues when the critical
+   * angle is changing. */
+  lut_coord.y += critical_cos;
+  float NV = clamp(lut_coord.y, 1e-4, 0.9999);
+
+  /* Squaring for perceptually linear roughness, see [Physically Based Shading at Disney]
+   * (https://media.disneyanimation.com/uploads/production/publication_asset/48/asset/s2012_pbs_disney_brdf_notes_v3.pdf)
+   * Section 5.4. */
+  float roughness = square(lut_coord.z);
+  float roughness_sq = square(roughness);
+
+  vec3 V = vec3(sqrt(1.0 - square(NV)), 0.0, NV);
+
+  /* Integrating BSDF */
+  float transmittance = 0.0;
+  float reflectance = 0.0;
+
+  const uint sample_count = 512u * 512u;
+  for (uint i = 0u; i < sample_count; i++) {
+    vec2 rand = hammersley_2d(i, sample_count);
+    vec3 Xi = sample_cylinder(rand);
+
+    /* Microfacet normal. */
+    vec3 H = sample_ggx(Xi, roughness, V);
+    float fresnel = F_eta(ior, dot(V, H));
+
+    /* Reflection. */
+    vec3 R = -reflect(V, H);
+    float NR = R.z;
+    if (NR > 0.0) {
+      /* Assuming sample visible normals, accumulating `brdf * NV / pdf.` */
+      reflectance += fresnel * bxdf_ggx_smith_G1(NR, roughness_sq);
+    }
+
+    /* Refraction. */
+    vec3 T = refract(-V, H, ior);
+    float NT = T.z;
+    /* In the case of TIR, `T == vec3(0)`. */
+    if (NT < 0.0) {
+      /* Assuming sample visible normals, accumulating `btdf * NV / pdf.` */
+      transmittance += (1.0 - fresnel) * bxdf_ggx_smith_G1(NT, roughness_sq);
+    }
+  }
+  transmittance /= float(sample_count);
+  reflectance /= float(sample_count);
+
+  /* There is place to put multi-scatter result (which is a little bit different still)
+   * and / or lobe fitting for better sampling of. */
+  return vec4(transmittance, reflectance, 0.0, 0.0);
+}
+
+void main()
+{
+  /* Make sure coordinates are covering the whole [0..1] range at texel center. */
+  vec3 lut_normalized_coordinate = vec3(gl_GlobalInvocationID) / vec3(table_extent - 1);
+  /* Make sure missing cases are noticeable. */
+  vec4 result = vec4(-1);
+  switch (uint(table_type)) {
+    case LUT_GGX_BRDF_SPLIT_SUM:
+      result = ggx_brdf_split_sum(lut_normalized_coordinate);
+      break;
+    case LUT_GGX_BTDF_SPLIT_SUM:
+      result = ggx_btdf_split_sum(lut_normalized_coordinate);
+      break;
+  }
+  imageStore(table_img, ivec3(gl_GlobalInvocationID), result);
+}
--- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
+++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl
@@ -256,23 +256,6 @@ float nodetree_thickness();
 vec4 closure_to_rgba(Closure cl);
 #endif

-/* Fresnel monochromatic, perfect mirror */
-float F_eta(float eta, float cos_theta)
-{
-  /* Compute fresnel reflectance without explicitly computing
-   * the refracted direction. */
-  float c = abs(cos_theta);
-  float g = eta * eta - 1.0 + c * c;
-  if (g > 0.0) {
-    g = sqrt(g);
-    float A = (g - c) / (g + c);
-    float B = (c * (g + c) - 1.0) / (c * (g - c) + 1.0);
-    return 0.5 * A * A * (1.0 + B * B);
-  }
-  /* Total internal reflections. */
-  return 1.0;
-}
-
 /* Simplified form of F_eta(eta, 1.0). */
 float F0_from_ior(float eta)
 {
--- a/source/blender/draw/engines/eevee_next/shaders/infos/eevee_lut_info.hh
+++ b/source/blender/draw/engines/eevee_next/shaders/infos/eevee_lut_info.hh
@@ -0,0 +1,14 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "eevee_defines.hh"
+
+GPU_SHADER_CREATE_INFO(eevee_lut)
+    .local_group_size(LUT_WORKGROUP_SIZE, LUT_WORKGROUP_SIZE, 1)
+    .push_constant(Type::INT, "table_type")
+    .push_constant(Type::IVEC3, "table_extent")
+    .image(0, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_3D, "table_img")
+    .additional_info("eevee_shared")
+    .compute_source("eevee_lut_comp.glsl")
+    .do_static_compilation(true);
--- a/source/blender/gpu/CMakeLists.txt
+++ b/source/blender/gpu/CMakeLists.txt
@@ -652,6 +652,7 @@ set(SRC_SHADER_CREATE_INFOS
  ../draw/engines/eevee_next/shaders/infos/eevee_hiz_info.hh
  ../draw/engines/eevee_next/shaders/infos/eevee_irradiance_cache_info.hh
  ../draw/engines/eevee_next/shaders/infos/eevee_light_culling_info.hh
+  ../draw/engines/eevee_next/shaders/infos/eevee_lut_info.hh
  ../draw/engines/eevee_next/shaders/infos/eevee_material_info.hh
  ../draw/engines/eevee_next/shaders/infos/eevee_motion_blur_info.hh
  ../draw/engines/eevee_next/shaders/infos/eevee_reflection_probe_info.hh