Merge branch 'blender-v4.2-release'
This commit is contained in:
@@ -365,6 +365,7 @@ set(GLSL_SRC
|
||||
engines/eevee_next/shaders/eevee_display_lightprobe_planar_vert.glsl
|
||||
engines/eevee_next/shaders/eevee_display_lightprobe_sphere_frag.glsl
|
||||
engines/eevee_next/shaders/eevee_display_lightprobe_sphere_vert.glsl
|
||||
engines/eevee_next/shaders/eevee_film_copy_frag.glsl
|
||||
engines/eevee_next/shaders/eevee_film_comp.glsl
|
||||
engines/eevee_next/shaders/eevee_film_cryptomatte_post_comp.glsl
|
||||
engines/eevee_next/shaders/eevee_film_frag.glsl
|
||||
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "BLI_hash.h"
|
||||
#include "BLI_rect.h"
|
||||
|
||||
#include "GPU_debug.hh"
|
||||
#include "GPU_framebuffer.hh"
|
||||
#include "GPU_texture.hh"
|
||||
|
||||
@@ -436,63 +437,36 @@ void Film::init(const int2 &extent, const rcti *output_rect)
|
||||
|
||||
void Film::sync()
|
||||
{
|
||||
/* We use a fragment shader for viewport because we need to output the depth. */
|
||||
bool use_compute = (inst_.is_viewport() == false);
|
||||
/* We use a fragment shader for viewport because we need to output the depth.
|
||||
*
|
||||
* Compute shader is also used to work around Metal/Intel iGPU issues concerning
|
||||
* read write support for array textures. In this case the copy_ps_ is used to
|
||||
* copy the right color/value to the framebuffer. */
|
||||
use_compute_ = !inst_.is_viewport() ||
|
||||
GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_MAC, GPU_DRIVER_ANY);
|
||||
|
||||
eShaderType shader = use_compute ? FILM_COMP : FILM_FRAG;
|
||||
eShaderType shader = use_compute_ ? FILM_COMP : FILM_FRAG;
|
||||
|
||||
/* TODO(fclem): Shader variation for panoramic & scaled resolution. */
|
||||
|
||||
RenderBuffers &rbuffers = inst_.render_buffers;
|
||||
VelocityModule &velocity = inst_.velocity;
|
||||
|
||||
GPUSamplerState filter = {GPU_SAMPLER_FILTERING_LINEAR};
|
||||
|
||||
/* For viewport, only previous motion is supported.
|
||||
* Still bind previous step to avoid undefined behavior. */
|
||||
eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
|
||||
|
||||
GPUShader *sh = inst_.shaders.static_shader_get(shader);
|
||||
accumulate_ps_.init();
|
||||
accumulate_ps_.specialize_constant(sh, "enabled_categories", uint(enabled_categories_));
|
||||
accumulate_ps_.specialize_constant(sh, "samples_len", &data_.samples_len);
|
||||
accumulate_ps_.specialize_constant(sh, "use_reprojection", &use_reprojection_);
|
||||
accumulate_ps_.specialize_constant(sh, "scaling_factor", data_.scaling_factor);
|
||||
accumulate_ps_.specialize_constant(sh, "combined_id", &data_.combined_id);
|
||||
accumulate_ps_.specialize_constant(sh, "display_id", &data_.display_id);
|
||||
accumulate_ps_.specialize_constant(sh, "normal_id", &data_.normal_id);
|
||||
accumulate_ps_.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
|
||||
accumulate_ps_.shader_set(sh);
|
||||
accumulate_ps_.bind_resources(inst_.uniform_data);
|
||||
accumulate_ps_.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
|
||||
accumulate_ps_.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
|
||||
accumulate_ps_.bind_ubo("camera_next", &(*velocity.camera_steps[step_next]));
|
||||
accumulate_ps_.bind_texture("depth_tx", &rbuffers.depth_tx);
|
||||
accumulate_ps_.bind_texture("combined_tx", &combined_final_tx_);
|
||||
accumulate_ps_.bind_texture("vector_tx", &rbuffers.vector_tx);
|
||||
accumulate_ps_.bind_texture("rp_color_tx", &rbuffers.rp_color_tx);
|
||||
accumulate_ps_.bind_texture("rp_value_tx", &rbuffers.rp_value_tx);
|
||||
accumulate_ps_.bind_texture("cryptomatte_tx", &rbuffers.cryptomatte_tx);
|
||||
/* NOTE(@fclem): 16 is the max number of sampled texture in many implementations.
|
||||
* If we need more, we need to pack more of the similar passes in the same textures as arrays or
|
||||
* use image binding instead. */
|
||||
accumulate_ps_.bind_image("in_weight_img", &weight_tx_.current());
|
||||
accumulate_ps_.bind_image("out_weight_img", &weight_tx_.next());
|
||||
accumulate_ps_.bind_texture("in_combined_tx", &combined_tx_.current(), filter);
|
||||
accumulate_ps_.bind_image("out_combined_img", &combined_tx_.next());
|
||||
accumulate_ps_.bind_image("depth_img", &depth_tx_);
|
||||
accumulate_ps_.bind_image("color_accum_img", &color_accum_tx_);
|
||||
accumulate_ps_.bind_image("value_accum_img", &value_accum_tx_);
|
||||
accumulate_ps_.bind_image("cryptomatte_img", &cryptomatte_tx_);
|
||||
init_pass(accumulate_ps_, sh);
|
||||
/* Sync with rendering passes. */
|
||||
accumulate_ps_.barrier(GPU_BARRIER_TEXTURE_FETCH | GPU_BARRIER_SHADER_IMAGE_ACCESS);
|
||||
if (use_compute) {
|
||||
if (use_compute_) {
|
||||
accumulate_ps_.dispatch(int3(math::divide_ceil(data_.extent, int2(FILM_GROUP_SIZE)), 1));
|
||||
}
|
||||
else {
|
||||
accumulate_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
|
||||
}
|
||||
|
||||
copy_ps_.init();
|
||||
if (use_compute_ && inst_.is_viewport()) {
|
||||
init_pass(copy_ps_, inst_.shaders.static_shader_get(FILM_COPY));
|
||||
copy_ps_.draw_procedural(GPU_PRIM_TRIS, 1, 3);
|
||||
}
|
||||
|
||||
const int cryptomatte_layer_count = cryptomatte_layer_len_get();
|
||||
const bool is_cryptomatte_pass_enabled = cryptomatte_layer_count > 0;
|
||||
const bool do_cryptomatte_sorting = inst_.is_viewport() == false;
|
||||
@@ -511,6 +485,49 @@ void Film::sync()
|
||||
}
|
||||
}
|
||||
|
||||
void Film::init_pass(PassSimple &pass, GPUShader *sh)
|
||||
{
|
||||
GPUSamplerState filter = {GPU_SAMPLER_FILTERING_LINEAR};
|
||||
RenderBuffers &rbuffers = inst_.render_buffers;
|
||||
VelocityModule &velocity = inst_.velocity;
|
||||
|
||||
pass.specialize_constant(sh, "enabled_categories", uint(enabled_categories_));
|
||||
pass.specialize_constant(sh, "samples_len", &data_.samples_len);
|
||||
pass.specialize_constant(sh, "use_reprojection", &use_reprojection_);
|
||||
pass.specialize_constant(sh, "scaling_factor", data_.scaling_factor);
|
||||
pass.specialize_constant(sh, "combined_id", &data_.combined_id);
|
||||
pass.specialize_constant(sh, "display_id", &data_.display_id);
|
||||
pass.specialize_constant(sh, "normal_id", &data_.normal_id);
|
||||
pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
|
||||
pass.shader_set(sh);
|
||||
/* For viewport, only previous motion is supported.
|
||||
* Still bind previous step to avoid undefined behavior. */
|
||||
eVelocityStep step_next = inst_.is_viewport() ? STEP_PREVIOUS : STEP_NEXT;
|
||||
|
||||
pass.bind_resources(inst_.uniform_data);
|
||||
pass.bind_ubo("camera_prev", &(*velocity.camera_steps[STEP_PREVIOUS]));
|
||||
pass.bind_ubo("camera_curr", &(*velocity.camera_steps[STEP_CURRENT]));
|
||||
pass.bind_ubo("camera_next", &(*velocity.camera_steps[step_next]));
|
||||
pass.bind_texture("depth_tx", &rbuffers.depth_tx);
|
||||
pass.bind_texture("combined_tx", &combined_final_tx_);
|
||||
pass.bind_texture("vector_tx", &rbuffers.vector_tx);
|
||||
pass.bind_texture("rp_color_tx", &rbuffers.rp_color_tx);
|
||||
pass.bind_texture("rp_value_tx", &rbuffers.rp_value_tx);
|
||||
pass.bind_texture("cryptomatte_tx", &rbuffers.cryptomatte_tx);
|
||||
/* NOTE(@fclem): 16 is the max number of sampled texture in many implementations.
|
||||
* If we need more, we need to pack more of the similar passes in the same textures as arrays or
|
||||
* use image binding instead. */
|
||||
pass.bind_image("in_weight_img", &weight_tx_.current());
|
||||
pass.bind_image("out_weight_img", &weight_tx_.next());
|
||||
pass.bind_texture("in_combined_tx", &combined_tx_.current(), filter);
|
||||
pass.bind_image("out_combined_img", &combined_tx_.next());
|
||||
pass.bind_image("depth_img", &depth_tx_);
|
||||
pass.bind_image("color_accum_img", &color_accum_tx_);
|
||||
pass.bind_image("value_accum_img", &value_accum_tx_);
|
||||
pass.bind_image("cryptomatte_img", &cryptomatte_tx_);
|
||||
copy_ps_.bind_resources(inst_.uniform_data);
|
||||
}
|
||||
|
||||
void Film::end_sync()
|
||||
{
|
||||
use_reprojection_ = inst_.sampling.interactive_mode();
|
||||
@@ -678,6 +695,7 @@ void Film::accumulate(View &view, GPUTexture *combined_final_tx)
|
||||
inst_.uniform_data.push_update();
|
||||
|
||||
inst_.manager->submit(accumulate_ps_, view);
|
||||
inst_.manager->submit(copy_ps_, view);
|
||||
|
||||
combined_tx_.swap();
|
||||
weight_tx_.swap();
|
||||
|
||||
@@ -55,6 +55,9 @@ class Film {
|
||||
/** Incoming combined buffer with post FX applied (motion blur + depth of field). */
|
||||
GPUTexture *combined_final_tx_ = nullptr;
|
||||
|
||||
/** Are we using the compute shader/pipeline. */
|
||||
bool use_compute_;
|
||||
|
||||
/**
|
||||
* Main accumulation textures containing every render-pass except depth, cryptomatte and
|
||||
* combined.
|
||||
@@ -71,6 +74,7 @@ class Film {
|
||||
SwapChain<Texture, 2> weight_tx_;
|
||||
|
||||
PassSimple accumulate_ps_ = {"Film.Accumulate"};
|
||||
PassSimple copy_ps_ = {"Film.Copy"};
|
||||
PassSimple cryptomatte_post_ps_ = {"Film.Cryptomatte.Post"};
|
||||
|
||||
FilmData &data_;
|
||||
@@ -296,6 +300,8 @@ class Film {
|
||||
* Precompute sample weights if they are uniform across the whole film extent.
|
||||
*/
|
||||
void update_sample_table();
|
||||
|
||||
void init_pass(PassSimple &pass, GPUShader *sh);
|
||||
};
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -111,12 +111,14 @@ const char *ShaderModule::static_shader_create_info_name_get(eShaderType shader_
|
||||
switch (shader_type) {
|
||||
case AMBIENT_OCCLUSION_PASS:
|
||||
return "eevee_ambient_occlusion_pass";
|
||||
case FILM_FRAG:
|
||||
return "eevee_film_frag";
|
||||
case FILM_COPY:
|
||||
return "eevee_film_copy_frag";
|
||||
case FILM_COMP:
|
||||
return "eevee_film_comp";
|
||||
case FILM_CRYPTOMATTE_POST:
|
||||
return "eevee_film_cryptomatte_post";
|
||||
case FILM_FRAG:
|
||||
return "eevee_film_frag";
|
||||
case DEFERRED_COMBINE:
|
||||
return "eevee_deferred_combine";
|
||||
case DEFERRED_LIGHT_SINGLE:
|
||||
|
||||
@@ -28,9 +28,10 @@ namespace blender::eevee {
|
||||
enum eShaderType {
|
||||
AMBIENT_OCCLUSION_PASS = 0,
|
||||
|
||||
FILM_FRAG,
|
||||
FILM_COPY,
|
||||
FILM_COMP,
|
||||
FILM_CRYPTOMATTE_POST,
|
||||
FILM_FRAG,
|
||||
|
||||
DEFERRED_CAPTURE_EVAL,
|
||||
DEFERRED_COMBINE,
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
/* SPDX-FileCopyrightText: 2022 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/**
|
||||
* `eevee_film_copy_frag` is used to work around Metal/Intel iGPU issues.
|
||||
*
|
||||
* Caches are not flushed in the eevee_film_frag shader due to unsupported read/write access.
|
||||
* We schedule the eevee_film_comp shader instead. Resources are attached read only and does the
|
||||
* part that is missing from the eevee_film_frag shader.
|
||||
*
|
||||
* Code is duplicated here to ensure that the compiler will pass read/write resource checks.
|
||||
*/
|
||||
#pragma BLENDER_REQUIRE(draw_view_lib.glsl)
|
||||
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
|
||||
|
||||
vec4 cryptomatte_false_color(float hash)
|
||||
{
|
||||
uint m3hash = floatBitsToUint(hash);
|
||||
return vec4(hash,
|
||||
float(m3hash << 8) / float(0xFFFFFFFFu),
|
||||
float(m3hash << 16) / float(0xFFFFFFFFu),
|
||||
1.0);
|
||||
}
|
||||
|
||||
void main()
|
||||
{
|
||||
ivec2 texel = ivec2(gl_FragCoord.xy);
|
||||
|
||||
if (display_id == -1) {
|
||||
out_color = texelFetch(in_combined_tx, texel, 0);
|
||||
}
|
||||
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_VALUE) {
|
||||
out_color.rgb = imageLoad(value_accum_img, ivec3(texel, display_id)).rrr;
|
||||
out_color.a = 1.0;
|
||||
}
|
||||
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_COLOR) {
|
||||
out_color = imageLoad(color_accum_img, ivec3(texel, display_id));
|
||||
}
|
||||
else /* PASS_STORAGE_CRYPTOMATTE */ {
|
||||
out_color = cryptomatte_false_color(imageLoad(cryptomatte_img, ivec3(texel, display_id)).r);
|
||||
}
|
||||
|
||||
float out_depth = imageLoad(depth_img, texel).r;
|
||||
out_depth = drw_depth_view_to_screen(-out_depth);
|
||||
out_depth += 2.4e-7 * 4.0 + fwidth(out_depth);
|
||||
gl_FragDepth = saturate(out_depth);
|
||||
}
|
||||
@@ -5,7 +5,7 @@
|
||||
#include "eevee_defines.hh"
|
||||
#include "gpu_shader_create_info.hh"
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_film)
|
||||
GPU_SHADER_CREATE_INFO(eevee_film_base)
|
||||
.sampler(0, ImageType::DEPTH_2D, "depth_tx")
|
||||
.sampler(1, ImageType::FLOAT_2D, "combined_tx")
|
||||
.sampler(2, ImageType::FLOAT_2D, "vector_tx")
|
||||
@@ -16,13 +16,6 @@ GPU_SHADER_CREATE_INFO(eevee_film)
|
||||
.sampler(6, ImageType::FLOAT_2D, "cryptomatte_tx")
|
||||
.image(0, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "in_weight_img")
|
||||
.image(1, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D_ARRAY, "out_weight_img")
|
||||
/* Color History for TAA needs to be sampler to leverage bilinear sampling. */
|
||||
//.image(2, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_combined_img")
|
||||
.image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_combined_img")
|
||||
.image(4, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "depth_img")
|
||||
.image(5, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
|
||||
.image(6, GPU_R16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
|
||||
.image(7, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
|
||||
.specialization_constant(Type::UINT, "enabled_categories", 0)
|
||||
.specialization_constant(Type::INT, "samples_len", 0)
|
||||
.specialization_constant(Type::BOOL, "use_reprojection", false)
|
||||
@@ -35,6 +28,16 @@ GPU_SHADER_CREATE_INFO(eevee_film)
|
||||
.additional_info("eevee_velocity_camera")
|
||||
.additional_info("draw_view");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_film)
|
||||
/* Color History for TAA needs to be sampler to leverage bilinear sampling. */
|
||||
//.image(2, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "in_combined_img")
|
||||
.image(3, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_combined_img")
|
||||
.image(4, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "depth_img")
|
||||
.image(5, GPU_RGBA16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
|
||||
.image(6, GPU_R16F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
|
||||
.image(7, GPU_RGBA32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
|
||||
.additional_info("eevee_film_base");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_film_frag)
|
||||
.do_static_compilation(true)
|
||||
.fragment_out(0, Type::VEC4, "out_color")
|
||||
@@ -57,3 +60,16 @@ GPU_SHADER_CREATE_INFO(eevee_film_cryptomatte_post)
|
||||
.local_group_size(FILM_GROUP_SIZE, FILM_GROUP_SIZE)
|
||||
.compute_source("eevee_film_cryptomatte_post_comp.glsl")
|
||||
.additional_info("eevee_shared");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_film_copy_frag)
|
||||
.do_static_compilation(true)
|
||||
.image(3, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D, "out_combined_img")
|
||||
.image(4, GPU_R32F, Qualifier::READ, ImageType::FLOAT_2D, "depth_img")
|
||||
.image(5, GPU_RGBA16F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "color_accum_img")
|
||||
.image(6, GPU_R16F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "value_accum_img")
|
||||
.image(7, GPU_RGBA32F, Qualifier::READ, ImageType::FLOAT_2D_ARRAY, "cryptomatte_img")
|
||||
.depth_write(DepthWrite::ANY)
|
||||
.fragment_out(0, Type::VEC4, "out_color")
|
||||
.fragment_source("eevee_film_copy_frag.glsl")
|
||||
.define("FILM_COPY")
|
||||
.additional_info("draw_fullscreen", "eevee_film_base");
|
||||
|
||||
@@ -207,6 +207,7 @@ void MTLBackend::platform_init(MTLContext *ctx)
|
||||
else if (strstr(vendor, "Intel")) {
|
||||
device = GPU_DEVICE_INTEL;
|
||||
driver = GPU_DRIVER_OFFICIAL;
|
||||
support_level = GPU_SUPPORT_LEVEL_LIMITED;
|
||||
}
|
||||
else if (strstr(vendor, "Apple") || strstr(vendor, "APPLE")) {
|
||||
/* Apple Silicon. */
|
||||
|
||||
Reference in New Issue
Block a user