Merge branch 'blender-v4.2-release'

This commit is contained in:
Jeroen Bakker
2024-06-18 10:55:24 +02:00
8 changed files with 147 additions and 54 deletions

View File

@@ -365,6 +365,7 @@ set(GLSL_SRC
engines/eevee_next/shaders/eevee_display_lightprobe_planar_vert.glsl
engines/eevee_next/shaders/eevee_display_lightprobe_sphere_frag.glsl
engines/eevee_next/shaders/eevee_display_lightprobe_sphere_vert.glsl
engines/eevee_next/shaders/eevee_film_copy_frag.glsl
engines/eevee_next/shaders/eevee_film_comp.glsl
engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl
engines/eevee_next/shaders/eevee_film_frag.glsl

View File

@@ -15,6 +15,7 @@
#include "BLI_hash.h"
#include "BLI_rect.h"
#include "GPU_debug.hh"
#include "GPU_framebuffer.hh"
#include "GPU_texture.hh"
@@ -436,63 +437,36 @@ void Film::init(const int2 &extent, const rcti *output_rect)
void Film::sync()
{
/* We use a fragment shader for viewport because we need to output the depth. */
bool use_compute = (inst_.is_viewport() == false);
/* We use a fragment shader for viewport because we need to output the depth.
*
* Compute shader is also used to work around Metal/Intel iGPU issues concerning
* read write support for array textures. In this case the copy_ps_ is used to
* copy the right color/value to the framebuffer. */
use_compute_ = !inst_.is_viewport() ||
GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_ANY);
eShaderType shader = use_compute ? FILM_COMP : FILM_FRAG;
eShaderType shader = use_compute_ ? FILM_COMP : FILM_FRAG;
/* TODO(fclem): Shader variation for panoramic & scaled resolution. */
RenderBuffers &rbuffers = inst_.render_buffers;
VelocityModule &velocity = inst_.velocity;
GPUSamplerState filter = {GPU_SAMPLER_FILTERING_LINEAR};
/* For viewport, only previous motion is supported.
* Still bind previous step to avoid undefined behavior. */
eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
GPUShader *sh = inst_.shaders.static_shader_get(shader);
accumulate_ps_.init();
accumulate_ps_.specialize_constant(sh, "enabled_categories", uint(enabled_categories_));
accumulate_ps_.specialize_constant(sh, "samples_len", &data_.samples_len);
accumulate_ps_.specialize_constant(sh, "use_reprojection", &use_reprojection_);
accumulate_ps_.specialize_constant(sh, "scaling_factor", data_.scaling_factor);
accumulate_ps_.specialize_constant(sh, "combined_id", &data_.combined_id);
accumulate_ps_.specialize_constant(sh, "display_id", &data_.display_id);
accumulate_ps_.specialize_constant(sh, "normal_id", &data_.normal_id);
accumulate_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
accumulate_ps_.shader_set(sh);
accumulate_ps_.bind_resources(inst_.uniform_data);
accumulate_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
accumulate_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
accumulate_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[step_next]));
accumulate_ps_.bind_texture("depth_tx", &rbuffers.depth_tx);
accumulate_ps_.bind_texture("combined_tx", &combined_final_tx_);
accumulate_ps_.bind_texture("vector_tx", &rbuffers.vector_tx);
accumulate_ps_.bind_texture("rp_color_tx", &rbuffers.rp_color_tx);
accumulate_ps_.bind_texture("rp_value_tx", &rbuffers.rp_value_tx);
accumulate_ps_.bind_texture("cryptomatte_tx", &rbuffers.cryptomatte_tx);
/* NOTE(@fclem): 16 is the max number of sampled texture in many implementations.
* If we need more, we need to pack more of the similar passes in the same textures as arrays or
* use image binding instead. */
accumulate_ps_.bind_image("in_weight_img", &weight_tx_.current());
accumulate_ps_.bind_image("out_weight_img", &weight_tx_.next());
accumulate_ps_.bind_texture("in_combined_tx", &combined_tx_.current(), filter);
accumulate_ps_.bind_image("out_combined_img", &combined_tx_.next());
accumulate_ps_.bind_image("depth_img", &depth_tx_);
accumulate_ps_.bind_image("color_accum_img", &color_accum_tx_);
accumulate_ps_.bind_image("value_accum_img", &value_accum_tx_);
accumulate_ps_.bind_image("cryptomatte_img", &cryptomatte_tx_);
init_pass(accumulate_ps_, sh);
/* Sync with rendering passes. */
accumulate_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
if (use_compute) {
if (use_compute_) {
accumulate_ps_.dispatch(int3(math::divide_ceil(data_.extent, int2(FILM_GROUP_SIZE)), 1));
}
else {
accumulate_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
copy_ps_.init();
if (use_compute_ && inst_.is_viewport()) {
init_pass(copy_ps_, inst_.shaders.static_shader_get(FILM_COPY));
copy_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
}
const int cryptomatte_layer_count = cryptomatte_layer_len_get();
const bool is_cryptomatte_pass_enabled = cryptomatte_layer_count > 0;
const bool do_cryptomatte_sorting = inst_.is_viewport() == false;
@@ -511,6 +485,49 @@ void Film::sync()
}
}
void Film::init_pass(PassSimple &pass, GPUShader *sh)
{
GPUSamplerState filter = {GPU_SAMPLER_FILTERING_LINEAR};
RenderBuffers &rbuffers = inst_.render_buffers;
VelocityModule &velocity = inst_.velocity;
pass.specialize_constant(sh, "enabled_categories", uint(enabled_categories_));
pass.specialize_constant(sh, "samples_len", &data_.samples_len);
pass.specialize_constant(sh, "use_reprojection", &use_reprojection_);
pass.specialize_constant(sh, "scaling_factor", data_.scaling_factor);
pass.specialize_constant(sh, "combined_id", &data_.combined_id);
pass.specialize_constant(sh, "display_id", &data_.display_id);
pass.specialize_constant(sh, "normal_id", &data_.normal_id);
pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
pass.shader_set(sh);
/* For viewport, only previous motion is supported.
* Still bind previous step to avoid undefined behavior. */
eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
pass.bind_resources(inst_.uniform_data);
pass.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
pass.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
pass.bind_ubo("camera_next", &(*velocity.camera_steps[step_next]));
pass.bind_texture("depth_tx", &rbuffers.depth_tx);
pass.bind_texture("combined_tx", &combined_final_tx_);
pass.bind_texture("vector_tx", &rbuffers.vector_tx);
pass.bind_texture("rp_color_tx", &rbuffers.rp_color_tx);
pass.bind_texture("rp_value_tx", &rbuffers.rp_value_tx);
pass.bind_texture("cryptomatte_tx", &rbuffers.cryptomatte_tx);
/* NOTE(@fclem): 16 is the max number of sampled texture in many implementations.
* If we need more, we need to pack more of the similar passes in the same textures as arrays or
* use image binding instead. */
pass.bind_image("in_weight_img", &weight_tx_.current());
pass.bind_image("out_weight_img", &weight_tx_.next());
pass.bind_texture("in_combined_tx", &combined_tx_.current(), filter);
pass.bind_image("out_combined_img", &combined_tx_.next());
pass.bind_image("depth_img", &depth_tx_);
pass.bind_image("color_accum_img", &color_accum_tx_);
pass.bind_image("value_accum_img", &value_accum_tx_);
pass.bind_image("cryptomatte_img", &cryptomatte_tx_);
copy_ps_.bind_resources(inst_.uniform_data);
}
void Film::end_sync()
{
use_reprojection_ = inst_.sampling.interactive_mode();
@@ -678,6 +695,7 @@ void Film::accumulate(View &view, GPUTexture *combined_final_tx)
inst_.uniform_data.push_update();
inst_.manager->submit(accumulate_ps_, view);
inst_.manager->submit(copy_ps_, view);
combined_tx_.swap();
weight_tx_.swap();

View File

@@ -55,6 +55,9 @@ class Film {
/** Incoming combined buffer with post FX applied (motion blur + depth of field). */
GPUTexture *combined_final_tx_ = nullptr;
/** Are we using the compute shader/pipeline. */
bool use_compute_;
/**
* Main accumulation textures containing every render-pass except depth, cryptomatte and
* combined.
@@ -71,6 +74,7 @@ class Film {
SwapChain<Texture, 2> weight_tx_;
PassSimple accumulate_ps_ = {"Film.Accumulate"};
PassSimple copy_ps_ = {"Film.Copy"};
PassSimple cryptomatte_post_ps_ = {"Film.Cryptomatte.Post"};
FilmData &data_;
@@ -296,6 +300,8 @@ class Film {
* Precompute sample weights if they are uniform across the whole film extent.
*/
void update_sample_table();
void init_pass(PassSimple &pass, GPUShader *sh);
};
/** \} */

View File

@@ -111,12 +111,14 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
switch (shader_type) {
case AMBIENT_OCCLUSION_PASS:
return "eevee_ambient_occlusion_pass";
case FILM_FRAG:
return "eevee_film_frag";
case FILM_COPY:
return "eevee_film_copy_frag";
case FILM_COMP:
return "eevee_film_comp";
case FILM_CRYPTOMATTE_POST:
return "eevee_film_cryptomatte_post";
case FILM_FRAG:
return "eevee_film_frag";
case DEFERRED_COMBINE:
return "eevee_deferred_combine";
case DEFERRED_LIGHT_SINGLE:

View File

@@ -28,9 +28,10 @@ namespace blender::eevee {
enum eShaderType {
AMBIENT_OCCLUSION_PASS = 0,
FILM_FRAG,
FILM_COPY,
FILM_COMP,
FILM_CRYPTOMATTE_POST,
FILM_FRAG,
DEFERRED_CAPTURE_EVAL,
DEFERRED_COMBINE,

View File

@@ -0,0 +1,48 @@
/* SPDX-FileCopyrightText: 2022 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/**
* `eevee_film_copy_frag` is used to work around Metal/Intel iGPU issues.
*
* Caches are not flushed in the eevee_film_frag shader due to unsupported read/write access.
* We schedule the eevee_film_comp shader instead. Resources are attached read only and does the
* part that is missing from the eevee_film_frag shader.
*
* Code is duplicated here to ensure that the compiler will pass read/write resource checks.
*/
#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
vec4 cryptomatte_false_color(float hash)
{
uint m3hash = floatBitsToUint(hash);
return vec4(hash,
float(m3hash << 8) / float(0xFFFFFFFFu),
float(m3hash << 16) / float(0xFFFFFFFFu),
1.0);
}
void main()
{
ivec2 texel = ivec2(gl_FragCoord.xy);
if (display_id == -1) {
out_color = texelFetch(in_combined_tx, texel, 0);
}
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_VALUE) {
out_color.rgb = imageLoad(value_accum_img, ivec3(texel, display_id)).rrr;
out_color.a = 1.0;
}
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_COLOR) {
out_color = imageLoad(color_accum_img, ivec3(texel, display_id));
}
else /* PASS_STORAGE_CRYPTOMATTE */ {
out_color = cryptomatte_false_color(imageLoad(cryptomatte_img, ivec3(texel, display_id)).r);
}
float out_depth = imageLoad(depth_img, texel).r;
out_depth = drw_depth_view_to_screen(-out_depth);
out_depth += 2.4e-7 * 4.0 + fwidth(out_depth);
gl_FragDepth = saturate(out_depth);
}

View File

@@ -5,7 +5,7 @@
#include "eevee_defines.hh"
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(eevee_film)
GPU_SHADER_CREATE_INFO(eevee_film_base)
.sampler(0, ImageType::DEPTH_2D, "depth_tx")
.sampler(1, ImageType::FLOAT_2D, "combined_tx")
.sampler(2, ImageType::FLOAT_2D, "vector_tx")
@@ -16,13 +16,6 @@ GPU_SHADER_CREATE_INFO(eevee_film)
.sampler(6, ImageType::FLOAT_2D, "cryptomatte_tx")
.image(0, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "in_weight_img")
.image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D_ARRAY, "out_weight_img")
/* Color History for TAA needs to be sampler to leverage bilinear sampling. */
//.image(2, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_combined_img")
.image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_combined_img")
.image(4, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "depth_img")
.image(5, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
.image(6, GPU_R16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
.image(7, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
.specialization_constant(Type::UINT, "enabled_categories", 0)
.specialization_constant(Type::INT, "samples_len", 0)
.specialization_constant(Type::BOOL, "use_reprojection", false)
@@ -35,6 +28,16 @@ GPU_SHADER_CREATE_INFO(eevee_film)
.additional_info("eevee_velocity_camera")
.additional_info("draw_view");
GPU_SHADER_CREATE_INFO(eevee_film)
/* Color History for TAA needs to be sampler to leverage bilinear sampling. */
//.image(2, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_combined_img")
.image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_combined_img")
.image(4, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "depth_img")
.image(5, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
.image(6, GPU_R16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
.image(7, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
.additional_info("eevee_film_base");
GPU_SHADER_CREATE_INFO(eevee_film_frag)
.do_static_compilation(true)
.fragment_out(0, Type::VEC4, "out_color")
@@ -57,3 +60,16 @@ GPU_SHADER_CREATE_INFO(eevee_film_cryptomatte_post)
.local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
.compute_source("eevee_film_cryptomatte_post_comp.glsl")
.additional_info("eevee_shared");
GPU_SHADER_CREATE_INFO(eevee_film_copy_frag)
.do_static_compilation(true)
.image(3, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "out_combined_img")
.image(4, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D, "depth_img")
.image(5, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
.image(6, GPU_R16F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
.image(7, GPU_RGBA32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
.depth_write(DepthWrite::ANY)
.fragment_out(0, Type::VEC4, "out_color")
.fragment_source("eevee_film_copy_frag.glsl")
.define("FILM_COPY")
.additional_info("draw_fullscreen", "eevee_film_base");

View File

@@ -207,6 +207,7 @@ void MTLBackend::platform_init(MTLContext *ctx)
else if (strstr(vendor, "Intel")) {
device = GPU_DEVICE_INTEL;
driver = GPU_DRIVER_OFFICIAL;
support_level = GPU_SUPPORT_LEVEL_LIMITED;
}
else if (strstr(vendor, "Apple") || strstr(vendor, "APPLE")) {
/* Apple Silicon. */