diff --git a/source/blender/draw/engines/eevee/eevee_film.cc b/source/blender/draw/engines/eevee/eevee_film.cc index 43b0fc96f73..b4803419567 100644 --- a/source/blender/draw/engines/eevee/eevee_film.cc +++ b/source/blender/draw/engines/eevee/eevee_film.cc @@ -641,6 +641,10 @@ void Film::end_sync() aovs_info.push_update(); sync_mist(); + + inst_.manager->warm_shader_specialization(accumulate_ps_); + inst_.manager->warm_shader_specialization(copy_ps_); + inst_.manager->warm_shader_specialization(cryptomatte_post_ps_); } float2 Film::pixel_jitter_get() const diff --git a/source/blender/draw/engines/eevee/eevee_pipeline.cc b/source/blender/draw/engines/eevee/eevee_pipeline.cc index 80b6f372eee..0ff7d630fbc 100644 --- a/source/blender/draw/engines/eevee/eevee_pipeline.cc +++ b/source/blender/draw/engines/eevee/eevee_pipeline.cc @@ -676,17 +676,22 @@ void DeferredLayer::end_sync(bool is_first_pass, sub.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx); sub.bind_image(RBUFS_COLOR_SLOT, &inst_.render_buffers.rp_color_tx); sub.bind_image(RBUFS_VALUE_SLOT, &inst_.render_buffers.rp_value_tx); + const ShadowSceneData &shadow_scene = inst_.shadows.get_data(); + auto set_specialization_constants = + [&](PassSimple::Sub &sub, GPUShader *sh, bool use_transmission) { + sub.specialize_constant(sh, "render_pass_shadow_id", rbuf_data.shadow_id); + sub.specialize_constant(sh, "use_split_indirect", use_split_indirect); + sub.specialize_constant(sh, "use_lightprobe_eval", use_lightprobe_eval); + sub.specialize_constant(sh, "use_transmission", use_transmission); + sub.specialize_constant(sh, "shadow_ray_count", &shadow_scene.ray_count); + sub.specialize_constant(sh, "shadow_ray_step_count", &shadow_scene.step_count); + }; /* Submit the more costly ones first to avoid long tail in occupancy. * See page 78 of "SIGGRAPH 2023: Unreal Engine Substrate" by Hillaire & de Rousiers. */ + for (int i = min_ii(3, closure_count_) - 1; i >= 0; i--) { GPUShader *sh = inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i)); - sub.specialize_constant(sh, "render_pass_shadow_id", rbuf_data.shadow_id); - sub.specialize_constant(sh, "use_split_indirect", use_split_indirect); - sub.specialize_constant(sh, "use_lightprobe_eval", use_lightprobe_eval); - sub.specialize_constant(sh, "use_transmission", false); - const ShadowSceneData &shadow_scene = inst_.shadows.get_data(); - sub.specialize_constant(sh, "shadow_ray_count", &shadow_scene.ray_count); - sub.specialize_constant(sh, "shadow_ray_step_count", &shadow_scene.step_count); + set_specialization_constants(sub, sh, false); sub.shader_set(sh); sub.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]); sub.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]); @@ -709,7 +714,7 @@ void DeferredLayer::end_sync(bool is_first_pass, sub.draw_procedural(GPU_PRIM_TRIS, 1, 3); if (use_transmission) { /* Separate pass for transmission BSDF as their evaluation is quite costly. */ - sub.specialize_constant(sh, "use_transmission", true); + set_specialization_constants(sub, sh, true); sub.shader_set(sh); sub.state_stencil(0x0u, (i + 1) | uint8_t(StencilBits::TRANSMISSION), compare_mask); sub.draw_procedural(GPU_PRIM_TRIS, 1, 3); diff --git a/source/blender/draw/engines/eevee/eevee_raytrace.cc b/source/blender/draw/engines/eevee/eevee_raytrace.cc index 12608515a3d..ef44e30339f 100644 --- a/source/blender/draw/engines/eevee/eevee_raytrace.cc +++ b/source/blender/draw/engines/eevee/eevee_raytrace.cc @@ -330,6 +330,23 @@ void RayTraceModule::sync() pass.dispatch(horizon_denoise_dispatch_buf_); pass.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS); } + for (int i : IndexRange(3)) { + data_.closure_index = i; + inst_.manager->warm_shader_specialization(tile_classify_ps_); + inst_.manager->warm_shader_specialization(tile_compact_ps_); + inst_.manager->warm_shader_specialization(generate_ps_); + inst_.manager->warm_shader_specialization(trace_planar_ps_); + inst_.manager->warm_shader_specialization(trace_screen_ps_); + inst_.manager->warm_shader_specialization(trace_fallback_ps_); + inst_.manager->warm_shader_specialization(denoise_spatial_ps_); + inst_.manager->warm_shader_specialization(denoise_temporal_ps_); + inst_.manager->warm_shader_specialization(denoise_bilateral_ps_); + inst_.manager->warm_shader_specialization(horizon_schedule_ps_); + inst_.manager->warm_shader_specialization(horizon_setup_ps_); + inst_.manager->warm_shader_specialization(horizon_scan_ps_); + inst_.manager->warm_shader_specialization(horizon_denoise_ps_); + inst_.manager->warm_shader_specialization(horizon_resolve_ps_); + } } void RayTraceModule::debug_pass_sync() {} diff --git a/source/blender/draw/engines/eevee/eevee_shader.cc b/source/blender/draw/engines/eevee/eevee_shader.cc index aed5a016240..ba94756b1df 100644 --- a/source/blender/draw/engines/eevee/eevee_shader.cc +++ b/source/blender/draw/engines/eevee/eevee_shader.cc @@ -139,14 +139,24 @@ bool ShaderModule::request_specializations(bool block_until_ready, Vector specializations; for (int i = 0; i < 3; i++) { GPUShader *sh = static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i)); + int render_pass_shadow_id_index = GPU_shader_get_constant(sh, "render_pass_shadow_id"); + int use_split_indirect_index = GPU_shader_get_constant(sh, "use_split_indirect"); + int use_lightprobe_eval_index = GPU_shader_get_constant(sh, "use_lightprobe_eval"); + int use_transmission_index = GPU_shader_get_constant(sh, "use_transmission"); + int shadow_ray_count_index = GPU_shader_get_constant(sh, "shadow_ray_count"); + int shadow_ray_step_count_index = GPU_shader_get_constant(sh, "shadow_ray_step_count"); + + gpu::shader::SpecializationConstants sp = GPU_shader_get_default_constant_state(sh); + for (bool use_transmission : {false, true}) { - specializations.append({sh, - {{"render_pass_shadow_id", render_buffers_shadow_id}, - {"use_split_indirect", use_split_indirect}, - {"use_lightprobe_eval", use_lightprobe_eval}, - {"use_transmission", use_transmission}, - {"shadow_ray_count", shadow_ray_count}, - {"shadow_ray_step_count", shadow_ray_step_count}}}); + sp.set_value(render_pass_shadow_id_index, render_buffers_shadow_id); + sp.set_value(use_split_indirect_index, use_split_indirect); + sp.set_value(use_lightprobe_eval_index, use_lightprobe_eval); + sp.set_value(use_transmission_index, use_transmission); + sp.set_value(shadow_ray_count_index, shadow_ray_count); + sp.set_value(shadow_ray_step_count_index, shadow_ray_step_count); + + specializations.append({sh, sp}); } } diff --git a/source/blender/draw/intern/draw_command.cc b/source/blender/draw/intern/draw_command.cc index a835fe7d8e0..50f099465c2 100644 --- a/source/blender/draw/intern/draw_command.cc +++ b/source/blender/draw/intern/draw_command.cc @@ -45,9 +45,13 @@ static gpu::Batch *procedural_batch_get(GPUPrimType primitive) void ShaderBind::execute(RecordingState &state) const { - if (assign_if_different(state.shader, shader)) { - GPU_shader_bind(shader); + state.shader_use_specialization = !GPU_shader_get_default_constant_state(shader).is_empty(); + if (assign_if_different(state.shader, shader) || state.shader_use_specialization) { + GPU_shader_bind(shader, state.specialization_constants_get()); } + /* Signal that we can reload the default for a different specialization later on. + * However, we keep the specialization_constants state around for compute shaders. */ + state.specialization_constants_in_use = false; } void FramebufferBind::execute() const @@ -129,35 +133,40 @@ void PushConstant::execute(RecordingState &state) const } } -void SpecializeConstant::execute() const +void SpecializeConstant::execute(command::RecordingState &state) const { /* All specialization constants should exist as they are not optimized out like uniforms. */ BLI_assert(location != -1); + if (state.specialization_constants_in_use == false) { + state.specialization_constants = GPU_shader_get_default_constant_state(this->shader); + state.specialization_constants_in_use = true; + } + switch (type) { case SpecializeConstant::Type::IntValue: - GPU_shader_constant_int_ex(shader, location, int_value); + state.specialization_constants.set_value(location, int_value); break; case SpecializeConstant::Type::IntReference: - GPU_shader_constant_int_ex(shader, location, *int_ref); + state.specialization_constants.set_value(location, *int_ref); break; case SpecializeConstant::Type::UintValue: - GPU_shader_constant_uint_ex(shader, location, uint_value); + state.specialization_constants.set_value(location, uint_value); break; case SpecializeConstant::Type::UintReference: - GPU_shader_constant_uint_ex(shader, location, *uint_ref); + state.specialization_constants.set_value(location, *uint_ref); break; case SpecializeConstant::Type::FloatValue: - GPU_shader_constant_float_ex(shader, location, float_value); + state.specialization_constants.set_value(location, float_value); break; case SpecializeConstant::Type::FloatReference: - GPU_shader_constant_float_ex(shader, location, *float_ref); + state.specialization_constants.set_value(location, *float_ref); break; case SpecializeConstant::Type::BoolValue: - GPU_shader_constant_bool_ex(shader, location, bool_value); + state.specialization_constants.set_value(location, bool_value); break; case SpecializeConstant::Type::BoolReference: - GPU_shader_constant_bool_ex(shader, location, *bool_ref); + state.specialization_constants.set_value(location, *bool_ref); break; } } @@ -177,6 +186,8 @@ void Draw::execute(RecordingState &state) const state.instance_offset += instance_len; } + GPU_shader_get_default_constant_state(state.shader).is_empty(); + if (is_primitive_expansion()) { /* Expanded draw-call. */ IndexRange expanded_range = GPU_batch_draw_expanded_parameter_get( @@ -194,13 +205,13 @@ void Draw::execute(RecordingState &state) const GPU_batch_bind_as_resources(batch, state.shader); gpu::Batch *gpu_batch = procedural_batch_get(GPUPrimType(expand_prim_type)); - GPU_batch_set_shader(gpu_batch, state.shader); + GPU_batch_set_shader(gpu_batch, state.shader, state.specialization_constants_get()); GPU_batch_draw_advanced( gpu_batch, expanded_range.start(), expanded_range.size(), instance_first, instance_len); } else { /* Regular draw-call. */ - GPU_batch_set_shader(batch, state.shader); + GPU_batch_set_shader(batch, state.shader, state.specialization_constants_get()); GPU_batch_draw_advanced(batch, vertex_first, vertex_len, instance_first, instance_len); } } @@ -227,7 +238,7 @@ void DrawMulti::execute(RecordingState &state) const GPU_batch_resource_id_buf_set(batch, state.resource_id_buf); } - GPU_batch_set_shader(batch, state.shader); + GPU_batch_set_shader(batch, state.shader, state.specialization_constants_get()); constexpr intptr_t stride = sizeof(DrawCommand); /* We have 2 indirect command reserved per draw group. */ @@ -259,16 +270,18 @@ void DrawIndirect::execute(RecordingState &state) const void Dispatch::execute(RecordingState &state) const { if (is_reference) { - GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z); + GPU_compute_dispatch( + state.shader, size_ref->x, size_ref->y, size_ref->z, state.specialization_constants_get()); } else { - GPU_compute_dispatch(state.shader, size.x, size.y, size.z); + GPU_compute_dispatch( + state.shader, size.x, size.y, size.z, state.specialization_constants_get()); } } void DispatchIndirect::execute(RecordingState &state) const { - GPU_compute_dispatch_indirect(state.shader, *indirect_buf); + GPU_compute_dispatch_indirect(state.shader, *indirect_buf, state.specialization_constants_get()); } void Barrier::execute() const diff --git a/source/blender/draw/intern/draw_command.hh b/source/blender/draw/intern/draw_command.hh index 0e3ecd8d87a..25844c3beb3 100644 --- a/source/blender/draw/intern/draw_command.hh +++ b/source/blender/draw/intern/draw_command.hh @@ -44,6 +44,11 @@ class DrawMultiBuf; * Keep track of several states and avoid redundant state changes. */ struct RecordingState { + gpu::shader::SpecializationConstants specialization_constants; + /* True if specialization_constants was set. */ + bool specialization_constants_in_use = false; + /* True if the bound shader uses specialization. */ + bool shader_use_specialization = false; GPUShader *shader = nullptr; bool front_facing = true; bool inverted_view = false; @@ -77,6 +82,11 @@ struct RecordingState { GPU_uniformbuf_debug_unbind_all(); } } + + const gpu::shader::SpecializationConstants *specialization_constants_get() + { + return shader_use_specialization ? &specialization_constants : nullptr; + } }; /** \} */ @@ -348,7 +358,7 @@ struct SpecializeConstant { SpecializeConstant(GPUShader *sh, int loc, const bool *val) : shader(sh), bool_ref(val), location(loc), type(Type::BoolReference){}; - void execute() const; + void execute(RecordingState &state) const; std::string serialize() const; }; diff --git a/source/blender/draw/intern/draw_manager.cc b/source/blender/draw/intern/draw_manager.cc index b67196f69f0..ccbffe91586 100644 --- a/source/blender/draw/intern/draw_manager.cc +++ b/source/blender/draw/intern/draw_manager.cc @@ -251,6 +251,24 @@ void Manager::generate_commands(PassSimple &pass) pass.draw_commands_buf_.generate_commands(pass.headers_, pass.commands_, pass.sub_passes_); } +void Manager::warm_shader_specialization(PassMain &pass) +{ + if (pass.is_empty()) { + return; + } + command::RecordingState state; + pass.warm_shader_specialization(state); +} + +void Manager::warm_shader_specialization(PassSimple &pass) +{ + if (pass.is_empty()) { + return; + } + command::RecordingState state; + pass.warm_shader_specialization(state); +} + void Manager::submit_only(PassMain &pass, View &view) { if (pass.is_empty()) { diff --git a/source/blender/draw/intern/draw_manager.hh b/source/blender/draw/intern/draw_manager.hh index 45c4ef501f1..c995028c09b 100644 --- a/source/blender/draw/intern/draw_manager.hh +++ b/source/blender/draw/intern/draw_manager.hh @@ -246,6 +246,13 @@ class Manager { */ void generate_commands(PassSimple &pass); + /** + * Make sure the shader specialization constants are already compiled. + * This avoid stalling the real submission call because of specialization. + */ + void warm_shader_specialization(PassMain &pass); + void warm_shader_specialization(PassSimple &pass); + /** * Submit a pass for drawing. All resource reference will be dereferenced and commands will be * sent to GPU. Visibility and command generation **must** have already been done explicitly diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh index 345b41c64c5..eb1517cb1d5 100644 --- a/source/blender/draw/intern/draw_pass.hh +++ b/source/blender/draw/intern/draw_pass.hh @@ -467,6 +467,12 @@ class PassBase { */ command::Undetermined &create_command(command::Type type); + /** + * Make sure the shader specialization constants are already compiled. + * This avoid stalling the real submission call because of specialization. + */ + void warm_shader_specialization(command::RecordingState &state) const; + void submit(command::RecordingState &state) const; bool has_generated_commands() const @@ -679,6 +685,59 @@ template inline PassBase &PassBase::sub(const char *name) return sub_passes_[index]; } +template +void PassBase::warm_shader_specialization(command::RecordingState &state) const +{ + GPU_debug_group_begin("warm_shader_specialization"); + + for (const command::Header &header : headers_) { + switch (header.type) { + default: + case Type::None: + break; + case Type::SubPass: + sub_passes_[header.index].warm_shader_specialization(state); + break; + case command::Type::FramebufferBind: + break; + case command::Type::SubPassTransition: + break; + case command::Type::ShaderBind: + commands_[header.index].shader_bind.execute(state); + break; + case command::Type::ResourceBind: + break; + case command::Type::PushConstant: + break; + case command::Type::SpecializeConstant: + commands_[header.index].specialize_constant.execute(state); + break; + case command::Type::Draw: + break; + case command::Type::DrawMulti: + break; + case command::Type::DrawIndirect: + break; + case command::Type::Dispatch: + break; + case command::Type::DispatchIndirect: + break; + case command::Type::Barrier: + break; + case command::Type::Clear: + break; + case command::Type::ClearMulti: + break; + case command::Type::StateSet: + break; + case command::Type::StencilSet: + break; + } + } + + GPU_debug_group_end(); +} + template void PassBase::submit(command::RecordingState &state) const { if (headers_.is_empty()) { @@ -711,7 +770,7 @@ template void PassBase::submit(command::RecordingState &state) const commands_[header.index].push_constant.execute(state); break; case command::Type::SpecializeConstant: - commands_[header.index].specialize_constant.execute(); + commands_[header.index].specialize_constant.execute(state); break; case command::Type::Draw: commands_[header.index].draw.execute(state); diff --git a/source/blender/gpu/GPU_batch.hh b/source/blender/gpu/GPU_batch.hh index e7c9f942c7c..b0d56c3d5d7 100644 --- a/source/blender/gpu/GPU_batch.hh +++ b/source/blender/gpu/GPU_batch.hh @@ -276,7 +276,10 @@ void GPU_batch_resource_id_buf_set(blender::gpu::Batch *batch, GPUStorageBuf *re * \note This need to be called first for the `GPU_batch_uniform_*` functions to work. */ /* TODO(fclem): These should be removed and replaced by `GPU_shader_bind()`. */ -void GPU_batch_set_shader(blender::gpu::Batch *batch, GPUShader *shader); +void GPU_batch_set_shader( + blender::gpu::Batch *batch, + GPUShader *shader, + const blender::gpu::shader::SpecializationConstants *constants_state = nullptr); void GPU_batch_program_set_builtin(blender::gpu::Batch *batch, eGPUBuiltinShader shader_id); void GPU_batch_program_set_builtin_with_config(blender::gpu::Batch *batch, eGPUBuiltinShader shader_id, diff --git a/source/blender/gpu/GPU_common_types.hh b/source/blender/gpu/GPU_common_types.hh index c0e82d107bf..c59ce15ee7d 100644 --- a/source/blender/gpu/GPU_common_types.hh +++ b/source/blender/gpu/GPU_common_types.hh @@ -9,6 +9,7 @@ #pragma once #include "BLI_string_ref.hh" +#include "BLI_vector.hh" /** * Describes the load operation of a frame-buffer attachment at the start of a render pass. @@ -220,4 +221,44 @@ struct SpecializationConstant { } }; +/** + * Specialization constants as a Struct-of-Arrays. Allow simpler comparison and reset. + * The backend is free to implement their support as they see fit. + */ +struct SpecializationConstants { + Vector types; + /* Current values set by `GPU_shader_constant_*()` call. The backend can choose to interpret + * that however it wants (i.e: bind another shader instead). */ + Vector values; + + void set_value(int index, uint32_t value) + { + BLI_assert_msg(types[index] == Type::uint_t, "Mismatch between interface and constant type"); + values[index].u = value; + } + + void set_value(int index, int value) + { + BLI_assert_msg(types[index] == Type::int_t, "Mismatch between interface and constant type"); + values[index].i = value; + } + + void set_value(int index, float value) + { + BLI_assert_msg(types[index] == Type::float_t, "Mismatch between interface and constant type"); + values[index].f = value; + } + + void set_value(int index, bool value) + { + BLI_assert_msg(types[index] == Type::bool_t, "Mismatch between interface and constant type"); + values[index].u = value ? 1 : 0; + } + + bool is_empty() const + { + return types.is_empty(); + } +}; + } // namespace blender::gpu::shader diff --git a/source/blender/gpu/GPU_compute.hh b/source/blender/gpu/GPU_compute.hh index 3e6c3104eb5..c8bdb001528 100644 --- a/source/blender/gpu/GPU_compute.hh +++ b/source/blender/gpu/GPU_compute.hh @@ -23,10 +23,12 @@ * The number of work groups (aka thread groups) is bounded by `GPU_max_work_group_count()` which * might be different in each of the 3 dimensions. */ -void GPU_compute_dispatch(GPUShader *shader, - uint groups_x_len, - uint groups_y_len, - uint groups_z_len); +void GPU_compute_dispatch( + GPUShader *shader, + uint groups_x_len, + uint groups_y_len, + uint groups_z_len, + const blender::gpu::shader::SpecializationConstants *constants_state = nullptr); /** * Dispatch a compute shader task. The size of the dispatch is sourced from a \a indirect_buf @@ -45,4 +47,7 @@ void GPU_compute_dispatch(GPUShader *shader, * The number of work groups (aka thread groups) is bounded by `GPU_max_work_group_count()` which * might be different in each of the 3 dimensions. */ -void GPU_compute_dispatch_indirect(GPUShader *shader, GPUStorageBuf *indirect_buf); +void GPU_compute_dispatch_indirect( + GPUShader *shader, + GPUStorageBuf *indirect_buf, + const blender::gpu::shader::SpecializationConstants *constants_state = nullptr); diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh index 52bc60c15da..82fb1f9ca52 100644 --- a/source/blender/gpu/GPU_shader.hh +++ b/source/blender/gpu/GPU_shader.hh @@ -127,7 +127,9 @@ void GPU_shader_free(GPUShader *shader); * Uniform functions need to have the shader bound in order to work. (TODO: until we use * glProgramUniform) */ -void GPU_shader_bind(GPUShader *shader); +void GPU_shader_bind( + GPUShader *shader, + const blender::gpu::shader::SpecializationConstants *constants_state = nullptr); /** * Unbind the active shader. @@ -234,21 +236,16 @@ bool GPU_shader_get_ssbo_input_info(const GPUShader *shader, int ssbo_location, * Otherwise, it will produce undefined behavior. * \{ */ -void GPU_shader_constant_int_ex(GPUShader *sh, int location, int value); -void GPU_shader_constant_uint_ex(GPUShader *sh, int location, unsigned int value); -void GPU_shader_constant_float_ex(GPUShader *sh, int location, float value); -void GPU_shader_constant_bool_ex(GPUShader *sh, int location, bool value); - -void GPU_shader_constant_int(GPUShader *sh, const char *name, int value); -void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int value); -void GPU_shader_constant_float(GPUShader *sh, const char *name, float value); -void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value); +/* Return the default constants. + * All constants available for this shader should fit the returned structure. */ +const blender::gpu::shader::SpecializationConstants &GPU_shader_get_default_constant_state( + GPUShader *sh); using SpecializationBatchHandle = int64_t; struct ShaderSpecialization { GPUShader *shader; - blender::Vector constants; + blender::gpu::shader::SpecializationConstants constants; }; /** diff --git a/source/blender/gpu/intern/gpu_batch.cc b/source/blender/gpu/intern/gpu_batch.cc index e28529538e5..581a440d0b0 100644 --- a/source/blender/gpu/intern/gpu_batch.cc +++ b/source/blender/gpu/intern/gpu_batch.cc @@ -223,10 +223,12 @@ void GPU_batch_resource_id_buf_set(Batch *batch, GPUStorageBuf *resource_id_buf) * * \{ */ -void GPU_batch_set_shader(Batch *batch, GPUShader *shader) +void GPU_batch_set_shader(Batch *batch, + GPUShader *shader, + const shader::SpecializationConstants *constants_state) { batch->shader = shader; - GPU_shader_bind(batch->shader); + GPU_shader_bind(batch->shader, constants_state); } static uint16_t bind_attribute_as_ssbo(const ShaderInterface *interface, diff --git a/source/blender/gpu/intern/gpu_compute.cc b/source/blender/gpu/intern/gpu_compute.cc index adad76079d5..d9a8be92ef3 100644 --- a/source/blender/gpu/intern/gpu_compute.cc +++ b/source/blender/gpu/intern/gpu_compute.cc @@ -13,19 +13,23 @@ void GPU_compute_dispatch(GPUShader *shader, uint groups_x_len, uint groups_y_len, - uint groups_z_len) + uint groups_z_len, + const blender::gpu::shader::SpecializationConstants *constants_state) { blender::gpu::GPUBackend &gpu_backend = *blender::gpu::GPUBackend::get(); - GPU_shader_bind(shader); + GPU_shader_bind(shader, constants_state); gpu_backend.compute_dispatch(groups_x_len, groups_y_len, groups_z_len); } -void GPU_compute_dispatch_indirect(GPUShader *shader, GPUStorageBuf *indirect_buf_) +void GPU_compute_dispatch_indirect( + GPUShader *shader, + GPUStorageBuf *indirect_buf_, + const blender::gpu::shader::SpecializationConstants *constants_state) { blender::gpu::GPUBackend &gpu_backend = *blender::gpu::GPUBackend::get(); blender::gpu::StorageBuf *indirect_buf = reinterpret_cast( indirect_buf_); - GPU_shader_bind(shader); + GPU_shader_bind(shader, constants_state); gpu_backend.compute_dispatch_indirect(indirect_buf); } diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index 62d03b785ac..bfd05a7a724 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -402,23 +402,24 @@ void GPU_shader_cache_dir_clear_old() /** \name Binding * \{ */ -void GPU_shader_bind(GPUShader *gpu_shader) +void GPU_shader_bind(GPUShader *gpu_shader, const shader::SpecializationConstants *constants_state) { Shader *shader = unwrap(gpu_shader); + BLI_assert_msg(constants_state != nullptr || shader->constants->is_empty(), + "Shader requires specialization constants but none was passed"); + Context *ctx = Context::get(); if (ctx->shader != shader) { ctx->shader = shader; - shader->bind(); + shader->bind(constants_state); GPU_matrix_bind(gpu_shader); Shader::set_srgb_uniform(ctx, gpu_shader); - shader->constants.is_dirty = false; } else { - if (shader->constants.is_dirty) { - shader->bind(); - shader->constants.is_dirty = false; + if (constants_state) { + shader->bind(constants_state); } if (ctx->shader_builtin_srgb_is_dirty) { Shader::set_srgb_uniform(ctx, gpu_shader); @@ -493,57 +494,20 @@ void GPU_shader_warm_cache(GPUShader *shader, int limit) /** \name Assign specialization constants. * \{ */ +const shader::SpecializationConstants &GPU_shader_get_default_constant_state(GPUShader *sh) +{ + return *unwrap(sh)->constants; +} + void Shader::specialization_constants_init(const shader::ShaderCreateInfo &info) { using namespace shader; + shader::SpecializationConstants constants_tmp; for (const SpecializationConstant &sc : info.specialization_constants_) { - constants.types.append(sc.type); - constants.values.append(sc.value); + constants_tmp.types.append(sc.type); + constants_tmp.values.append(sc.value); } - constants.is_dirty = true; -} - -void GPU_shader_constant_int_ex(GPUShader *sh, int location, int value) -{ - Shader &shader = *unwrap(sh); - BLI_assert(shader.constants.types[location] == gpu::shader::Type::int_t); - shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].i, value); -} -void GPU_shader_constant_uint_ex(GPUShader *sh, int location, uint value) -{ - Shader &shader = *unwrap(sh); - BLI_assert(shader.constants.types[location] == gpu::shader::Type::uint_t); - shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].u, value); -} -void GPU_shader_constant_float_ex(GPUShader *sh, int location, float value) -{ - Shader &shader = *unwrap(sh); - BLI_assert(shader.constants.types[location] == gpu::shader::Type::float_t); - shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].f, value); -} -void GPU_shader_constant_bool_ex(GPUShader *sh, int location, bool value) -{ - Shader &shader = *unwrap(sh); - BLI_assert(shader.constants.types[location] == gpu::shader::Type::bool_t); - shader.constants.is_dirty |= assign_if_different(shader.constants.values[location].u, - uint32_t(value)); -} - -void GPU_shader_constant_int(GPUShader *sh, const char *name, int value) -{ - GPU_shader_constant_int_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value); -} -void GPU_shader_constant_uint(GPUShader *sh, const char *name, uint value) -{ - GPU_shader_constant_uint_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value); -} -void GPU_shader_constant_float(GPUShader *sh, const char *name, float value) -{ - GPU_shader_constant_float_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value); -} -void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value) -{ - GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value); + constants = std::make_unique(std::move(constants_tmp)); } SpecializationBatchHandle GPU_shader_batch_specializations( @@ -841,8 +805,10 @@ Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_ba } Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str()); - shader->init(info, is_batch_compilation); + /* Needs to be called before init as GL uses the default specialization constants state to insert + * default shader inside a map. */ shader->specialization_constants_init(info); + shader->init(info, is_batch_compilation); shader->fragment_output_bits = 0; for (const shader::ShaderCreateInfo::FragOut &frag_out : info.fragment_outputs_) { diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index 9a9bc1f6fcf..07ab0f1bd88 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -47,24 +47,9 @@ class Shader { /** Bit-set indicating the frame-buffer color attachments that this shader writes to. */ uint16_t fragment_output_bits = 0; - /** - * Specialization constants as a Struct-of-Arrays. Allow simpler comparison and reset. - * The backend is free to implement their support as they see fit. - */ - struct Constants { - using Value = shader::SpecializationConstant::Value; - Vector types; - /* Current values set by `GPU_shader_constant_*()` call. The backend can choose to interpret - * that however it wants (i.e: bind another shader instead). */ - Vector values; - - /** - * OpenGL needs to know if a different program needs to be attached when constants are - * changed. Vulkan and Metal uses pipelines and don't have this issue. Attribute can be - * removed after the OpenGL backend has been phased out. - */ - bool is_dirty; - } constants; + /* Default specialization constants state as defined inside ShaderCreateInfo. + * Should be considered as const after init(). */ + std::unique_ptr constants; /* WORKAROUND: True if this shader is a polyline shader and needs an appropriate setup to render. * Eventually, in the future, we should modify the user code instead of relying on such hacks. */ @@ -98,7 +83,7 @@ class Shader { * See `GPU_shader_warm_cache(..)` in `GPU_shader.hh` for more information. */ virtual void warm_cache(int limit) = 0; - virtual void bind() = 0; + virtual void bind(const shader::SpecializationConstants *constants_state) = 0; virtual void unbind() = 0; virtual void uniform_float(int location, int comp_len, int array_size, const float *data) = 0; diff --git a/source/blender/gpu/metal/mtl_batch.hh b/source/blender/gpu/metal/mtl_batch.hh index a9d0cce85d9..99020603933 100644 --- a/source/blender/gpu/metal/mtl_batch.hh +++ b/source/blender/gpu/metal/mtl_batch.hh @@ -68,7 +68,6 @@ class MTLBatch : public Batch { private: MTLShader *active_shader_ = nullptr; - bool shader_in_use_ = false; MTLVertexDescriptorCache vao_cache = {this}; /* Topology emulation. */ @@ -115,7 +114,6 @@ class MTLBatch : public Batch { } private: - void shader_bind(); void draw_advanced(int v_first, int v_count, int i_first, int i_count); void draw_advanced_indirect(GPUStorageBuf *indirect_buf, intptr_t offset); int prepare_vertex_binding(MTLVertBuf *verts, diff --git a/source/blender/gpu/metal/mtl_batch.mm b/source/blender/gpu/metal/mtl_batch.mm index 40ba02020e7..c06887694e6 100644 --- a/source/blender/gpu/metal/mtl_batch.mm +++ b/source/blender/gpu/metal/mtl_batch.mm @@ -34,28 +34,14 @@ namespace blender::gpu { * \{ */ void MTLBatch::draw(int v_first, int v_count, int i_first, int i_count) { - if (this->flag & GPU_BATCH_INVALID) { - this->shader_in_use_ = false; - } this->draw_advanced(v_first, v_count, i_first, i_count); } void MTLBatch::draw_indirect(GPUStorageBuf *indirect_buf, intptr_t offset) { - if (this->flag & GPU_BATCH_INVALID) { - this->shader_in_use_ = false; - } this->draw_advanced_indirect(indirect_buf, offset); } -void MTLBatch::shader_bind() -{ - if (active_shader_ && active_shader_->is_valid()) { - active_shader_->bind(); - shader_in_use_ = true; - } -} - void MTLBatch::MTLVertexDescriptorCache::vertex_descriptor_cache_init(MTLContext *ctx) { BLI_assert(ctx != nullptr); @@ -417,9 +403,6 @@ id MTLBatch::bind() /* Debug Check: Ensure Frame-buffer instance is not dirty. */ BLI_assert(!ctx->main_command_buffer.get_active_framebuffer()->get_dirty()); - /* Bind Shader. */ - this->shader_bind(); - /* GPU debug markers. */ if (G.debug & G_DEBUG_GPU) { [rec pushDebugGroup:[NSString stringWithFormat:@"Draw Commands%@ (GPUShader: %s)", diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index df5e1c99867..958ab493459 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -838,6 +838,11 @@ class MTLContext : public Context { id get_sampler_from_state(MTLSamplerState state); id get_default_sampler_state(); + /* Active shader specialization constants state. */ + shader::SpecializationConstants constants_state; + + void specialization_constants_set(const shader::SpecializationConstants *constants_state); + /* Metal Context pipeline state. */ void pipeline_state_init(); MTLShader *get_active_shader(); diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index 9b2476b5814..e3c9bed7fad 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -709,6 +709,13 @@ void MTLContext::free_dummy_resources() } } +void MTLContext::specialization_constants_set( + const shader::SpecializationConstants *constants_state) +{ + this->constants_state = (constants_state != nullptr) ? *constants_state : + shader::SpecializationConstants{}; +} + /** \} */ /* -------------------------------------------------------------------- */ @@ -2185,11 +2192,10 @@ const MTLComputePipelineStateInstance *MTLContext::ensure_compute_pipeline_state MTLShader *active_shader = this->pipeline_state.active_shader; /* Set descriptor to default shader constants . */ - MTLComputePipelineStateDescriptor compute_pipeline_descriptor(active_shader->constants.values); + MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants_state.values); const MTLComputePipelineStateInstance *compute_pso_inst = - this->pipeline_state.active_shader->bake_compute_pipeline_state(this, - compute_pipeline_descriptor); + active_shader->bake_compute_pipeline_state(this, compute_pipeline_descriptor); if (compute_pso_inst == nullptr || compute_pso_inst->pso == nil) { MTL_LOG_WARNING("No valid compute PSO for compute dispatch!", ); diff --git a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh index a95cd84854a..1fd9d75e6ca 100644 --- a/source/blender/gpu/metal/mtl_pso_descriptor_state.hh +++ b/source/blender/gpu/metal/mtl_pso_descriptor_state.hh @@ -173,10 +173,13 @@ struct MTLVertexDescriptor { }; struct SpecializationStateDescriptor { - Vector values; + Vector values; SpecializationStateDescriptor() = default; - SpecializationStateDescriptor(Vector source) : values(source) {} + SpecializationStateDescriptor(Vector source) + : values(source) + { + } bool operator==(const SpecializationStateDescriptor &other) const { @@ -187,7 +190,7 @@ struct SpecializationStateDescriptor { { uint64_t hash = values.size(); uint seed = 0xFF; - for (const Shader::Constants::Value &value : values) { + for (const shader::SpecializationConstant::Value &value : values) { seed = seed << 1; hash ^= seed ^ value.u; } @@ -343,7 +346,7 @@ struct MTLComputePipelineStateDescriptor { SpecializationStateDescriptor specialization_state; MTLComputePipelineStateDescriptor() = default; - MTLComputePipelineStateDescriptor(Vector values) + MTLComputePipelineStateDescriptor(Vector values) { specialization_state.values = values; } diff --git a/source/blender/gpu/metal/mtl_shader.hh b/source/blender/gpu/metal/mtl_shader.hh index 26e39f13165..9981c188931 100644 --- a/source/blender/gpu/metal/mtl_shader.hh +++ b/source/blender/gpu/metal/mtl_shader.hh @@ -282,7 +282,7 @@ class MTLShader : public Shader { std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override; std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override; - void bind() override; + void bind(const shader::SpecializationConstants *constants_state) override; void unbind() override; void uniform_float(int location, int comp_len, int array_size, const float *data) override; diff --git a/source/blender/gpu/metal/mtl_shader.mm b/source/blender/gpu/metal/mtl_shader.mm index b638c38c2ee..5c1bed55b60 100644 --- a/source/blender/gpu/metal/mtl_shader.mm +++ b/source/blender/gpu/metal/mtl_shader.mm @@ -452,7 +452,7 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) * NOTE: This will compile the base unspecialized variant. */ if (is_compute) { /* Set descriptor to default shader constants */ - MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants.values); + MTLComputePipelineStateDescriptor compute_pipeline_descriptor(this->constants->values); this->bake_compute_pipeline_state(context_, compute_pipeline_descriptor); } @@ -470,9 +470,12 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info) /** \name Shader Binding. * \{ */ -void MTLShader::bind() +void MTLShader::bind(const shader::SpecializationConstants *constants_state) { MTLContext *ctx = MTLContext::get(); + /* Copy constants state. */ + ctx->specialization_constants_set(constants_state); + if (interface == nullptr || !this->is_valid()) { MTL_LOG_WARNING( "MTLShader::bind - Shader '%s' has no valid implementation in Metal, draw calls will be " @@ -763,11 +766,11 @@ void MTLShader::set_interface(MTLShaderInterface *interface) */ static void populate_specialization_constant_values( MTLFunctionConstantValues *values, - const Shader::Constants &shader_constants, + const shader::SpecializationConstants &shader_constants, const SpecializationStateDescriptor &specialization_descriptor) { for (auto i : shader_constants.types.index_range()) { - const Shader::Constants::Value &value = specialization_descriptor.values[i]; + const shader::SpecializationConstant::Value &value = specialization_descriptor.values[i]; uint index = i + MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID; switch (shader_constants.types[i]) { @@ -884,7 +887,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state( (requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified; /* Specialization configuration. */ - pipeline_descriptor.specialization_state = {this->constants.values}; + pipeline_descriptor.specialization_state = {ctx->constants_state.values}; /* Bake pipeline state using global descriptor. */ return bake_pipeline_state(ctx, prim_type, pipeline_descriptor); @@ -929,7 +932,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state( /* Custom function constant values: */ populate_specialization_constant_values( - values, this->constants, pipeline_descriptor.specialization_state); + values, *this->constants, pipeline_descriptor.specialization_state); /* Prepare Vertex descriptor based on current pipeline vertex binding state. */ MTLRenderPipelineDescriptor *desc = pso_descriptor_; @@ -1380,7 +1383,7 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state( /* Check if current PSO exists in the cache. */ pso_cache_lock_.lock(); - MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr( + MTLComputePipelineStateInstance *const *pso_lookup = compute_pso_cache_.lookup_ptr( compute_pipeline_descriptor); MTLComputePipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr; pso_cache_lock_.unlock(); @@ -1401,7 +1404,7 @@ MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state( /* Custom function constant values: */ populate_specialization_constant_values( - values, this->constants, compute_pipeline_descriptor.specialization_state); + values, *this->constants, compute_pipeline_descriptor.specialization_state); /* Offset the bind index for Uniform buffers such that they begin after the VBO * buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function @@ -1576,17 +1579,8 @@ void MTLShaderCompiler::specialize_shader(ShaderSpecialization &specialization) return; } - Vector specialization_values(shader->interface->constant_len_); - - for (const SpecializationConstant &constant : specialization.constants) { - const ShaderInput *input = shader->interface->constant_get(constant.name.c_str()); - BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists"); - specialization_values[input->location].u = constant.value.u; - } - shader->constants.is_dirty = true; - /* Create descriptor using these specialization constants. */ - MTLComputePipelineStateDescriptor compute_pipeline_descriptor(specialization_values); + MTLComputePipelineStateDescriptor compute_pipeline_descriptor(specialization.constants.values); MTLContext *metal_context = static_cast(Context::get()); shader->bake_compute_pipeline_state(metal_context, compute_pipeline_descriptor); diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 57be6016021..f7feeea6937 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -71,6 +71,14 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat for (const SpecializationConstant &constant : info.specialization_constants_) { specialization_constant_names_.append(constant.name.c_str()); } + + /* NOTE: This is not threadsafe with regards to the specialization constants state access. + * The shader creation must be externally synchronized. */ + main_program_ = &program_cache_.lookup_or_add_default(constants->values); + if (!main_program_->program_id) { + main_program_->program_id = glCreateProgram(); + debug::object_label(GL_PROGRAM, main_program_->program_id, name); + } } /** \} */ @@ -622,15 +630,16 @@ std::string GLShader::resources_declare(const ShaderCreateInfo &info) const return ss.str(); } -std::string GLShader::constants_declare() const +std::string GLShader::constants_declare( + const shader::SpecializationConstants &constants_state) const { std::stringstream ss; ss << "/* Specialization Constants. */\n"; - for (int constant_index : IndexRange(constants.types.size())) { + for (int constant_index : IndexRange(constants_state.types.size())) { const StringRefNull name = specialization_constant_names_[constant_index]; - gpu::shader::Type constant_type = constants.types[constant_index]; - const SpecializationConstant::Value &value = constants.values[constant_index]; + gpu::shader::Type constant_type = constants_state.types[constant_index]; + const SpecializationConstant::Value &value = constants_state.values[constant_index]; switch (constant_type) { case Type::int_t: @@ -1189,14 +1198,14 @@ StringRefNull GLShader::glsl_patch_get(GLenum gl_stage) GLuint GLShader::create_shader_stage(GLenum gl_stage, MutableSpan sources, - GLSources &gl_sources) + GLSources &gl_sources, + const shader::SpecializationConstants &constants_state) { /* Patch the shader sources to include specialization constants. */ std::string constants_source; Vector recreated_sources; - const bool has_specialization_constants = !constants.types.is_empty(); - if (has_specialization_constants) { - constants_source = constants_declare(); + if (has_specialization_constants()) { + constants_source = constants_declare(constants_state); if (sources.is_empty()) { recreated_sources = gl_sources.sources_get(); sources = recreated_sources; @@ -1291,40 +1300,38 @@ GLuint GLShader::create_shader_stage(GLenum gl_stage, void GLShader::update_program_and_sources(GLSources &stage_sources, MutableSpan sources) { - const bool store_sources = !constants.types.is_empty() || async_compilation_; + const bool store_sources = has_specialization_constants() || async_compilation_; if (store_sources && stage_sources.is_empty()) { stage_sources = sources; } - - init_program(); } void GLShader::vertex_shader_from_glsl(MutableSpan sources) { update_program_and_sources(vertex_sources_, sources); - program_active_->vert_shader = this->create_shader_stage( - GL_VERTEX_SHADER, sources, vertex_sources_); + main_program_->vert_shader = create_shader_stage( + GL_VERTEX_SHADER, sources, vertex_sources_, *constants); } void GLShader::geometry_shader_from_glsl(MutableSpan sources) { update_program_and_sources(geometry_sources_, sources); - program_active_->geom_shader = this->create_shader_stage( - GL_GEOMETRY_SHADER, sources, geometry_sources_); + main_program_->geom_shader = create_shader_stage( + GL_GEOMETRY_SHADER, sources, geometry_sources_, *constants); } void GLShader::fragment_shader_from_glsl(MutableSpan sources) { update_program_and_sources(fragment_sources_, sources); - program_active_->frag_shader = this->create_shader_stage( - GL_FRAGMENT_SHADER, sources, fragment_sources_); + main_program_->frag_shader = create_shader_stage( + GL_FRAGMENT_SHADER, sources, fragment_sources_, *constants); } void GLShader::compute_shader_from_glsl(MutableSpan sources) { update_program_and_sources(compute_sources_, sources); - program_active_->compute_shader = this->create_shader_stage( - GL_COMPUTE_SHADER, sources, compute_sources_); + main_program_->compute_shader = create_shader_stage( + GL_COMPUTE_SHADER, sources, compute_sources_, *constants); } bool GLShader::finalize(const shader::ShaderCreateInfo *info) @@ -1346,25 +1353,31 @@ bool GLShader::finalize(const shader::ShaderCreateInfo *info) return true; } - program_link(); + main_program_->program_link(name); return post_finalize(info); } bool GLShader::post_finalize(const shader::ShaderCreateInfo *info) { - if (!check_link_status()) { + GLuint program_id = main_program_->program_id; + GLint status; + glGetProgramiv(program_id, GL_LINK_STATUS, &status); + if (!status) { + char log[5000]; + glGetProgramInfoLog(program_id, sizeof(log), nullptr, log); + GLLogParser parser; + print_log({debug_source}, log, "Linking", true, &parser); return false; } /* Reset for specialization constants variations. */ async_compilation_ = false; - GLuint program_id = program_get(); if (info != nullptr) { - interface = new GLShaderInterface(program_id, *info); + interface = new GLShaderInterface(main_program_->program_id, *info); } else { - interface = new GLShaderInterface(program_id); + interface = new GLShaderInterface(main_program_->program_id); } return true; @@ -1376,10 +1389,10 @@ bool GLShader::post_finalize(const shader::ShaderCreateInfo *info) /** \name Binding * \{ */ -void GLShader::bind() +void GLShader::bind(const shader::SpecializationConstants *constants_state) { - GLuint program_id = program_get(); - glUseProgram(program_id); + GLProgram &program = program_get(constants_state); + glUseProgram(program.program_id); } void GLShader::unbind() @@ -1533,103 +1546,80 @@ GLShader::GLProgram::~GLProgram() glDeleteProgram(program_id); } -void GLShader::program_link() +void GLShader::GLProgram::program_link(StringRefNull shader_name) { - BLI_assert(program_active_ != nullptr); - if (program_active_->program_id == 0) { - program_active_->program_id = glCreateProgram(); - debug::object_label(GL_PROGRAM, program_active_->program_id, name); + if (this->program_id == 0) { + this->program_id = glCreateProgram(); + debug::object_label(GL_PROGRAM, this->program_id, shader_name.c_str()); } - if (async_compilation_) { - return; - } + GLuint program_id = this->program_id; - GLuint program_id = program_active_->program_id; - - if (program_active_->vert_shader) { - glAttachShader(program_id, program_active_->vert_shader); + if (this->vert_shader) { + glAttachShader(program_id, this->vert_shader); } - if (program_active_->geom_shader) { - glAttachShader(program_id, program_active_->geom_shader); + if (this->geom_shader) { + glAttachShader(program_id, this->geom_shader); } - if (program_active_->frag_shader) { - glAttachShader(program_id, program_active_->frag_shader); + if (this->frag_shader) { + glAttachShader(program_id, this->frag_shader); } - if (program_active_->compute_shader) { - glAttachShader(program_id, program_active_->compute_shader); + if (this->compute_shader) { + glAttachShader(program_id, this->compute_shader); } glLinkProgram(program_id); } -bool GLShader::check_link_status() +GLShader::GLProgram &GLShader::program_get(const shader::SpecializationConstants *constants_state) { - GLuint program_id = program_active_->program_id; - GLint status; - glGetProgramiv(program_id, GL_LINK_STATUS, &status); - if (!status) { - char log[5000]; - glGetProgramInfoLog(program_id, sizeof(log), nullptr, log); - GLLogParser parser; - print_log({debug_source}, log, "Linking", true, &parser); + BLI_assert(constants_state == nullptr || this->has_specialization_constants() == true); + + if (constants_state == nullptr) { + /* Early exit for shaders that doesn't use specialization constants. */ + BLI_assert(main_program_); + return *main_program_; } - return bool(status); -} + program_cache_mutex_.lock(); -void GLShader::init_program() -{ - if (program_active_) { - return; + GLProgram &program = program_cache_.lookup_or_add_default(constants_state->values); + + program_cache_mutex_.unlock(); + + /* Avoid two threads trying to specialize the same shader at the same time. */ + std::scoped_lock lock(program.compilation_mutex); + + if (program.program_id != 0) { + /* Specialization is already compiled. */ + return program; } - program_active_ = &program_cache_.lookup_or_add_default(constants.values); - if (!program_active_->program_id) { - program_active_->program_id = glCreateProgram(); - debug::object_label(GL_PROGRAM, program_active_->program_id, name); + if (!vertex_sources_.is_empty()) { + program.vert_shader = create_shader_stage( + GL_VERTEX_SHADER, {}, vertex_sources_, *constants_state); } -} - -GLuint GLShader::program_get() -{ - if (constants.types.is_empty()) { - /* Early exit for shaders that doesn't use specialization constants. The active shader should - * already be setup. */ - BLI_assert(program_active_ && program_active_->program_id); - return program_active_->program_id; + if (!geometry_sources_.is_empty()) { + program.geom_shader = create_shader_stage( + GL_GEOMETRY_SHADER, {}, geometry_sources_, *constants_state); + } + if (!fragment_sources_.is_empty()) { + program.frag_shader = create_shader_stage( + GL_FRAGMENT_SHADER, {}, fragment_sources_, *constants_state); + } + if (!compute_sources_.is_empty()) { + program.compute_shader = create_shader_stage( + GL_COMPUTE_SHADER, {}, compute_sources_, *constants_state); } - if (!constants.is_dirty) { - /* Early exit when constants didn't change since the last call. */ - BLI_assert(program_active_ && program_active_->program_id); - return program_active_->program_id; + if (async_compilation_) { + program.program_id = glCreateProgram(); + debug::object_label(GL_PROGRAM, program.program_id, name); + return program; } - program_active_ = &program_cache_.lookup_or_add_default(constants.values); - if (!program_active_->program_id) { - MutableSpan no_sources; - if (!vertex_sources_.is_empty()) { - program_active_->vert_shader = create_shader_stage( - GL_VERTEX_SHADER, no_sources, vertex_sources_); - } - if (!geometry_sources_.is_empty()) { - program_active_->geom_shader = create_shader_stage( - GL_GEOMETRY_SHADER, no_sources, geometry_sources_); - } - if (!fragment_sources_.is_empty()) { - program_active_->frag_shader = create_shader_stage( - GL_FRAGMENT_SHADER, no_sources, fragment_sources_); - } - if (!compute_sources_.is_empty()) { - program_active_->compute_shader = create_shader_stage( - GL_COMPUTE_SHADER, no_sources, compute_sources_); - } + program.program_link(name); - program_link(); - } - - constants.is_dirty = false; - return program_active_->program_id; + return program; } GLSourcesBaked GLShader::get_sources() @@ -1840,7 +1830,12 @@ Shader *GLShaderCompiler::compile_shader(const shader::ShaderCreateInfo &info) GLCompilerWorker *worker = get_compiler_worker(sources); - if (!worker->load_program_binary(shader->program_active_->program_id) || + /* This path is always called for the default shader compilation. Not for specialization. + * Use the default constant template.*/ + const shader::SpecializationConstants &constants = GPU_shader_get_default_constant_state( + wrap(shader)); + + if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values).program_id) || !shader->post_finalize(&info)) { /* Compilation failed, try to compile it locally. */ @@ -1862,17 +1857,10 @@ void GLShaderCompiler::specialize_shader(ShaderSpecialization &specialization) static std::mutex mutex; GLShader *shader = static_cast(unwrap(specialization.shader)); - Vector &constants = specialization.constants; auto program_get = [&]() -> GLShader::GLProgram * { - for (const SpecializationConstant &constant : constants) { - const ShaderInput *input = shader->interface->constant_get(constant.name.c_str()); - BLI_assert_msg(input != nullptr, "The specialization constant doesn't exists"); - shader->constants.values[input->location].u = constant.value.u; - } - shader->constants.is_dirty = true; - if (shader->program_cache_.contains(shader->constants.values)) { - return &shader->program_cache_.lookup(shader->constants.values); + if (shader->program_cache_.contains(specialization.constants.values)) { + return &shader->program_cache_.lookup(specialization.constants.values); } return nullptr; }; @@ -1882,7 +1870,6 @@ void GLShaderCompiler::specialize_shader(ShaderSpecialization &specialization) GLShader::GLProgram *program = program_get(); glDeleteProgram(program->program_id); program->program_id = 0; - shader->constants.is_dirty = true; }; GLSourcesBaked sources; @@ -1896,7 +1883,7 @@ void GLShaderCompiler::specialize_shader(ShaderSpecialization &specialization) /** WORKAROUND: Set async_compilation to true, so only the sources are generated. */ shader->async_compilation_ = true; - shader->program_get(); + shader->program_get(&specialization.constants); shader->async_compilation_ = false; sources = shader->get_sources(); diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 079b94d72f4..268c54904f7 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -81,6 +81,8 @@ class GLShader : public Shader { GLuint frag_shader = 0; GLuint compute_shader = 0; + std::mutex compilation_mutex; + GLProgram() {} GLProgram(GLProgram &&other) { @@ -96,16 +98,18 @@ class GLShader : public Shader { other.compute_shader = 0; } ~GLProgram(); + + void program_link(StringRefNull shader_name); }; using GLProgramCacheKey = Vector; + /** Contains all specialized shader variants. */ Map program_cache_; - /** - * Points to the active program. When binding a shader the active program is - * setup. - */ - GLProgram *program_active_ = nullptr; + std::mutex program_cache_mutex_; + + /** Main program instance. This is the default specialized variant that is first compiled. */ + GLProgram *main_program_ = nullptr; /* When true, the shader generates its GLSources but it's not compiled. * (Used for batch compilation) */ @@ -123,29 +127,13 @@ class GLShader : public Shader { Vector specialization_constant_names_; - /** - * Initialize an this instance. - * - * - Ensures that program_cache at least has a default GLProgram. - * - Ensures that active program is set. - * - Active GLProgram has a shader_program (at least in creation state). - * - Does nothing when instance was already initialized. - */ - void init_program(); - void update_program_and_sources(GLSources &stage_sources, MutableSpan sources); /** - * Link the active program. - */ - void program_link(); - bool check_link_status(); - - /** - * Return a GLProgram program id that reflects the current state of shader.constants.values. + * Return a GLProgram that reflects the given `constants_state`. * The returned program_id is in linked state, or an error happened during linking. */ - GLuint program_get(); + GLShader::GLProgram &program_get(const shader::SpecializationConstants *constants_state); /** True if any shader failed to compile. */ bool compilation_failed_ = false; @@ -168,14 +156,14 @@ class GLShader : public Shader { void warm_cache(int /*limit*/) override{}; std::string resources_declare(const shader::ShaderCreateInfo &info) const override; - std::string constants_declare() const; + std::string constants_declare(const shader::SpecializationConstants &constants_state) const; std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const override; std::string fragment_interface_declare(const shader::ShaderCreateInfo &info) const override; std::string geometry_interface_declare(const shader::ShaderCreateInfo &info) const override; std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override; std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override; - void bind() override; + void bind(const shader::SpecializationConstants *constants_state) override; void unbind() override; void uniform_float(int location, int comp_len, int array_size, const float *data) override; @@ -189,7 +177,7 @@ class GLShader : public Shader { if (!compute_sources_.is_empty()) { return true; } - return program_active_->compute_shader != 0; + return main_program_->compute_shader != 0; } GLSourcesBaked get_sources(); @@ -197,10 +185,16 @@ class GLShader : public Shader { private: StringRefNull glsl_patch_get(GLenum gl_stage); + bool has_specialization_constants() const + { + return constants->types.is_empty() == false; + } + /** Create, compile and attach the shader stage to the shader program. */ GLuint create_shader_stage(GLenum gl_stage, MutableSpan sources, - GLSources &gl_sources); + GLSources &gl_sources, + const shader::SpecializationConstants &constants_state); /** * \brief features available on newer implementation such as native barycentric coordinates diff --git a/source/blender/gpu/tests/framebuffer_test.cc b/source/blender/gpu/tests/framebuffer_test.cc index 5673f670da6..8c615a260b4 100644 --- a/source/blender/gpu/tests/framebuffer_test.cc +++ b/source/blender/gpu/tests/framebuffer_test.cc @@ -314,6 +314,8 @@ static void test_framebuffer_multi_viewport() } MEM_freeN(read_data); + GPU_shader_unbind(); + GPU_framebuffer_free(framebuffer); GPU_texture_free(texture); GPU_shader_free(shader); @@ -400,6 +402,8 @@ static void test_framebuffer_subpass_input() EXPECT_EQ(*read_data_b, 0xDEADC0DE); MEM_freeN(read_data_b); + GPU_shader_unbind(); + GPU_framebuffer_free(framebuffer); GPU_texture_free(texture_a); GPU_texture_free(texture_b); diff --git a/source/blender/gpu/tests/specialization_constants_test.cc b/source/blender/gpu/tests/specialization_constants_test.cc index c484845d565..5c291314c49 100644 --- a/source/blender/gpu/tests/specialization_constants_test.cc +++ b/source/blender/gpu/tests/specialization_constants_test.cc @@ -14,8 +14,6 @@ #include "GPU_storage_buffer.hh" #include "GPU_vertex_format.hh" -#include "BLI_math_vector.hh" -#include "BLI_utility_mixins.hh" #include "BLI_vector.hh" #include "gpu_shader_create_info.hh" @@ -44,26 +42,24 @@ struct ShaderSpecializationConst { GPU_storagebuf_bind(ssbo, GPU_shader_get_ssbo_binding(shader, "data_out")); - /* Expect defaults. */ - float_in = 2; - uint_in = 3; - int_in = 4; - bool_in = true; - - this->validate(); - /* Test values. */ float_in = 52; uint_in = 324; int_in = 455; bool_in = false; - GPU_shader_constant_float(shader, "float_in", float_in); - GPU_shader_constant_uint(shader, "uint_in", uint_in); - GPU_shader_constant_int(shader, "int_in", int_in); - GPU_shader_constant_bool(shader, "bool_in", bool_in); + int float_in_loc = GPU_shader_get_constant(shader, "float_in"); + int uint_in_loc = GPU_shader_get_constant(shader, "uint_in"); + int int_in_loc = GPU_shader_get_constant(shader, "int_in"); + int bool_in_loc = GPU_shader_get_constant(shader, "bool_in"); - this->validate(); + shader::SpecializationConstants constants = GPU_shader_get_default_constant_state(shader); + constants.set_value(float_in_loc, float_in); + constants.set_value(uint_in_loc, uint_in); + constants.set_value(int_in_loc, int_in); + constants.set_value(bool_in_loc, bool_in); + + this->validate(constants); GPU_render_end(); } @@ -94,7 +90,7 @@ struct ShaderSpecializationConst { EXPECT_NE(shader, nullptr); } - void validate() + void validate(shader::SpecializationConstants &constants) { if (is_graphic) { GPUFrameBuffer *fb = GPU_framebuffer_create("test_fb"); @@ -108,14 +104,14 @@ struct ShaderSpecializationConst { GPU_vertbuf_data_alloc(*verts, 1); Batch *batch = GPU_batch_create_ex(GPU_PRIM_POINTS, verts, nullptr, GPU_BATCH_OWNS_VBO); - GPU_batch_set_shader(batch, shader); + GPU_batch_set_shader(batch, shader, &constants); GPU_batch_draw_advanced(batch, 0, 1, 0, 1); GPU_batch_discard(batch); GPU_framebuffer_free(fb); } else { - GPU_compute_dispatch(shader, 1, 1, 1); + GPU_compute_dispatch(shader, 1, 1, 1, &constants); } GPU_finish(); diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index 2981d2a757a..42b43ad26a5 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -276,16 +276,17 @@ void VKContext::update_pipeline_data(GPUPrimType primitive, { VKShader &vk_shader = unwrap(*shader); VKFrameBuffer &framebuffer = *active_framebuffer_get(); - update_pipeline_data( - vk_shader, - vk_shader.ensure_and_get_graphics_pipeline(primitive, vao, state_manager_get(), framebuffer), - r_pipeline_data); + update_pipeline_data(vk_shader, + vk_shader.ensure_and_get_graphics_pipeline( + primitive, vao, state_manager_get(), framebuffer, constants_state_), + r_pipeline_data); } void VKContext::update_pipeline_data(render_graph::VKPipelineData &r_pipeline_data) { VKShader &vk_shader = unwrap(*shader); - update_pipeline_data(vk_shader, vk_shader.ensure_and_get_compute_pipeline(), r_pipeline_data); + update_pipeline_data( + vk_shader, vk_shader.ensure_and_get_compute_pipeline(constants_state_), r_pipeline_data); } void VKContext::update_pipeline_data(VKShader &vk_shader, @@ -401,6 +402,13 @@ void VKContext::swap_buffers_post_handler() sync_backbuffer(true); } +void VKContext::specialization_constants_set( + const shader::SpecializationConstants *constants_state) +{ + constants_state_ = (constants_state != nullptr) ? *constants_state : + shader::SpecializationConstants{}; +} + /** \} */ /* -------------------------------------------------------------------- */ diff --git a/source/blender/gpu/vulkan/vk_context.hh b/source/blender/gpu/vulkan/vk_context.hh index fea16fe0c79..95739c68892 100644 --- a/source/blender/gpu/vulkan/vk_context.hh +++ b/source/blender/gpu/vulkan/vk_context.hh @@ -52,6 +52,9 @@ class VKContext : public Context, NonCopyable { std::optional> thread_data_; std::optional> render_graph_; + /* Active shader specialization constants state. */ + shader::SpecializationConstants constants_state_; + public: VKDiscardPool discard_pool; @@ -135,6 +138,8 @@ class VKContext : public Context, NonCopyable { static void openxr_acquire_framebuffer_image_callback(GHOST_VulkanOpenXRData *data); static void openxr_release_framebuffer_image_callback(GHOST_VulkanOpenXRData *data); + void specialization_constants_set(const shader::SpecializationConstants *constants_state); + private: void swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &data); void swap_buffers_post_handler(); diff --git a/source/blender/gpu/vulkan/vk_shader.cc b/source/blender/gpu/vulkan/vk_shader.cc index ea76aba029f..258b3fb2619 100644 --- a/source/blender/gpu/vulkan/vk_shader.cc +++ b/source/blender/gpu/vulkan/vk_shader.cc @@ -645,7 +645,9 @@ bool VKShader::finalize_post() * step for graphical shaders. */ if (result && is_compute_shader_) { - ensure_and_get_compute_pipeline(); + /* This is only done for the first shader compilation (not specialization). + * Give the default constants. */ + ensure_and_get_compute_pipeline(*constants); } return result; } @@ -732,8 +734,12 @@ bool VKShader::finalize_descriptor_set_layouts(VKDevice &vk_device, return vk_descriptor_set_layout_ != VK_NULL_HANDLE; } -void VKShader::bind() +void VKShader::bind(const shader::SpecializationConstants *constants_state) { + VKContext *ctx = VKContext::get(); + /* Copy constants state. */ + ctx->specialization_constants_set(constants_state); + /* Intentionally empty. Binding of the pipeline are done just before drawing/dispatching. * See #VKPipeline.update_and_bind */ } @@ -1294,7 +1300,8 @@ bool VKShader::do_geometry_shader_injection(const shader::ShaderCreateInfo *info /** \} */ -VkPipeline VKShader::ensure_and_get_compute_pipeline() +VkPipeline VKShader::ensure_and_get_compute_pipeline( + const shader::SpecializationConstants &constants_state) { BLI_assert(is_compute_shader_); BLI_assert(compute_module.vk_shader_module != VK_NULL_HANDLE); @@ -1302,12 +1309,12 @@ VkPipeline VKShader::ensure_and_get_compute_pipeline() /* Early exit when no specialization constants are used and the vk_pipeline_base_ is already * valid. This would handle most cases. */ - if (constants.values.is_empty() && vk_pipeline_base_ != VK_NULL_HANDLE) { + if (constants_state.values.is_empty() && vk_pipeline_base_ != VK_NULL_HANDLE) { return vk_pipeline_base_; } VKComputeInfo compute_info = {}; - compute_info.specialization_constants.extend(constants.values); + compute_info.specialization_constants.extend(constants_state.values); compute_info.vk_shader_module = compute_module.vk_shader_module; compute_info.vk_pipeline_layout = vk_pipeline_layout; @@ -1325,7 +1332,8 @@ VkPipeline VKShader::ensure_and_get_compute_pipeline() VkPipeline VKShader::ensure_and_get_graphics_pipeline(GPUPrimType primitive, VKVertexAttributeObject &vao, VKStateManager &state_manager, - VKFrameBuffer &framebuffer) + VKFrameBuffer &framebuffer, + SpecializationConstants &constants_state) { BLI_assert(!is_compute_shader_); BLI_assert_msg( @@ -1336,7 +1344,7 @@ VkPipeline VKShader::ensure_and_get_graphics_pipeline(GPUPrimType primitive, /* TODO: Graphics info should be cached in VKContext and only the changes should be applied. */ VKGraphicsInfo graphics_info = {}; - graphics_info.specialization_constants.extend(constants.values); + graphics_info.specialization_constants.extend(constants_state.values); graphics_info.vk_pipeline_layout = vk_pipeline_layout; graphics_info.vertex_in.vk_topology = to_vk_primitive_topology(primitive); diff --git a/source/blender/gpu/vulkan/vk_shader.hh b/source/blender/gpu/vulkan/vk_shader.hh index dfe80e830a1..ca6a090fade 100644 --- a/source/blender/gpu/vulkan/vk_shader.hh +++ b/source/blender/gpu/vulkan/vk_shader.hh @@ -76,7 +76,7 @@ class VKShader : public Shader { bool is_ready() const; void warm_cache(int limit) override; - void bind() override; + void bind(const shader::SpecializationConstants *constants_state) override; void unbind() override; void uniform_float(int location, int comp_len, int array_size, const float *data) override; @@ -89,11 +89,13 @@ class VKShader : public Shader { std::string geometry_layout_declare(const shader::ShaderCreateInfo &info) const override; std::string compute_layout_declare(const shader::ShaderCreateInfo &info) const override; - VkPipeline ensure_and_get_compute_pipeline(); + VkPipeline ensure_and_get_compute_pipeline( + const shader::SpecializationConstants &constants_state); VkPipeline ensure_and_get_graphics_pipeline(GPUPrimType primitive, VKVertexAttributeObject &vao, VKStateManager &state_manager, - VKFrameBuffer &framebuffer); + VKFrameBuffer &framebuffer, + shader::SpecializationConstants &constants_state); const VKShaderInterface &interface_get() const;