From 023865b3147fe42bdff42dcb0f76ed1283c7827e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Fri, 6 Jun 2025 14:39:51 +0200 Subject: [PATCH 1/5] Vulkan: Add CPU profiling This has limited use cases since it doesn't profile the heavy part of the vulkan backend. Almost 1:1 port of the metal implementation from #139551. Doesn't cover rendergraph submission nor GPU timings. Pull Request: https://projects.blender.org/blender/blender/pulls/139899 --- source/blender/gpu/intern/gpu_shader.cc | 19 ++++++++ source/blender/gpu/vulkan/vk_context.cc | 17 ++++++- source/blender/gpu/vulkan/vk_context.hh | 15 +++++++ source/blender/gpu/vulkan/vk_debug.cc | 60 +++++++++++++++++++++++++ 4 files changed, 109 insertions(+), 2 deletions(-) diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index 4b595ecdea4..f083461441e 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -19,6 +19,7 @@ #include "gpu_backend.hh" #include "gpu_context_private.hh" +#include "gpu_profile_report.hh" #include "gpu_shader_create_info.hh" #include "gpu_shader_create_info_private.hh" #include "gpu_shader_dependency_private.hh" @@ -805,14 +806,22 @@ void Shader::set_framebuffer_srgb_target(int use_srgb_to_linear) Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation) { + using Clock = std::chrono::steady_clock; + using TimePoint = Clock::time_point; + using namespace blender::gpu::shader; const_cast(info).finalize(); + TimePoint start_time; + if (Context::get()) { /* Context can be null in Vulkan compilation threads. */ GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP); GPU_debug_group_begin(info.name_.c_str()); } + else { + start_time = Clock::now(); + } const std::string error = info.check_error(); if (!error.empty()) { @@ -936,6 +945,16 @@ Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_ba GPU_debug_group_end(); GPU_debug_group_end(); } + else { + TimePoint end_time = Clock::now(); + /* Note: Used by the vulkan backend. Use the same time_since_epoch as process_frame_timings. */ + ProfileReport::get().add_group_cpu(GPU_DEBUG_SHADER_COMPILATION_GROUP, + start_time.time_since_epoch().count(), + end_time.time_since_epoch().count()); + ProfileReport::get().add_group_cpu(info.name_.c_str(), + start_time.time_since_epoch().count(), + end_time.time_since_epoch().count()); + } return shader; } diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index ffbb4a3e0a9..dd1860e153f 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -45,6 +45,8 @@ VKContext::~VKContext() free_resources(); VKBackend::get().device.context_unregister(*this); + this->process_frame_timings(); + imm = nullptr; } @@ -108,8 +110,13 @@ void VKContext::activate() if (!render_graph_.has_value()) { render_graph_ = std::reference_wrapper( *device.render_graph_new()); + /* Recreate the debug group stack for the new graph. + * Note: there is no associated `debug_group_end` as the graph groups + * are implicitly closed on submission. */ for (const StringRef &group : debug_stack) { - debug_group_begin(std::string(group).c_str(), 0); + std::string str_group = group; + render_graph_.value().get().debug_group_begin(str_group.c_str(), + debug::get_debug_group_color(str_group)); } } @@ -138,6 +145,7 @@ void VKContext::end_frame() { VKDevice &device = VKBackend::get().device; device.orphaned_data.destroy_discarded_resources(device); + this->process_frame_timings(); } void VKContext::flush() @@ -175,8 +183,13 @@ TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags, if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) { render_graph_ = std::reference_wrapper( *device.render_graph_new()); + /* Recreate the debug group stack for the new graph. + * Note: there is no associated `debug_group_end` as the graph groups + * are implicitly closed on submission. */ for (const StringRef &group : debug_stack) { - debug_group_begin(std::string(group).c_str(), 0); + std::string str_group = group; + render_graph_.value().get().debug_group_begin(str_group.c_str(), + debug::get_debug_group_color(str_group)); } } return timeline; diff --git a/source/blender/gpu/vulkan/vk_context.hh b/source/blender/gpu/vulkan/vk_context.hh index 95739c68892..56978dea4de 100644 --- a/source/blender/gpu/vulkan/vk_context.hh +++ b/source/blender/gpu/vulkan/vk_context.hh @@ -55,6 +55,21 @@ class VKContext : public Context, NonCopyable { /* Active shader specialization constants state. */ shader::SpecializationConstants constants_state_; + /* Debug scope timings. Adapted form GLContext::TimeQuery. + * Only supports CPU timings for now. */ + struct ScopeTimings { + using Clock = std::chrono::steady_clock; + using TimePoint = Clock::time_point; + using Nanoseconds = std::chrono::nanoseconds; + + std::string name; + bool finished; + TimePoint cpu_start, cpu_end; + }; + Vector scope_timings; + + void process_frame_timings(); + public: VKDiscardPool discard_pool; diff --git a/source/blender/gpu/vulkan/vk_debug.cc b/source/blender/gpu/vulkan/vk_debug.cc index 340efae0d19..8d7b0765ecf 100644 --- a/source/blender/gpu/vulkan/vk_debug.cc +++ b/source/blender/gpu/vulkan/vk_debug.cc @@ -16,17 +16,77 @@ #include "vk_debug.hh" #include "vk_to_string.hh" +#include "gpu_profile_report.hh" + static CLG_LogRef LOG = {"gpu.vulkan"}; namespace blender::gpu { void VKContext::debug_group_begin(const char *name, int) { render_graph().debug_group_begin(name, debug::get_debug_group_color(name)); + + if (!G.profile_gpu) { + return; + } + + ScopeTimings timings = {}; + timings.name = name; + timings.finished = false; + timings.cpu_start = ScopeTimings::Clock::now(); + + scope_timings.append(timings); } void VKContext::debug_group_end() { render_graph().debug_group_end(); + + if (!G.profile_gpu) { + return; + } + + for (int i = scope_timings.size() - 1; i >= 0; i--) { + ScopeTimings &query = scope_timings[i]; + if (!query.finished) { + query.finished = true; + query.cpu_end = ScopeTimings::Clock::now(); + break; + } + if (i == 0) { + std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n"; + } + } +} + +void VKContext::process_frame_timings() +{ + if (!G.profile_gpu) { + return; + } + + Vector &queries = scope_timings; + + bool frame_is_valid = !queries.is_empty(); + + for (int i = queries.size() - 1; i >= 0; i--) { + if (!queries[i].finished) { + frame_is_valid = false; + std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n"; + } + break; + } + + if (!frame_is_valid) { + return; + } + + for (ScopeTimings &query : queries) { + ProfileReport::get().add_group_cpu(query.name, + query.cpu_start.time_since_epoch().count(), + query.cpu_end.time_since_epoch().count()); + } + + queries.clear(); } bool VKContext::debug_capture_begin(const char *title) From e0e528913835ca2e36823408b231374411cd3618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Fri, 6 Jun 2025 14:57:36 +0200 Subject: [PATCH 2/5] Cleanup: GPU: Use CLOG instead of std::cout for profile error --- source/blender/gpu/metal/mtl_debug.mm | 4 ++-- source/blender/gpu/opengl/gl_debug.cc | 4 ++-- source/blender/gpu/vulkan/vk_debug.cc | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/blender/gpu/metal/mtl_debug.mm b/source/blender/gpu/metal/mtl_debug.mm index dc95e9c4518..85a17220746 100644 --- a/source/blender/gpu/metal/mtl_debug.mm +++ b/source/blender/gpu/metal/mtl_debug.mm @@ -83,7 +83,7 @@ void MTLContext::debug_group_end() break; } if (i == 0) { - std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n"; + CLOG_ERROR(&LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n"); } } } @@ -104,7 +104,7 @@ void MTLContext::process_frame_timings() for (int i = queries.size() - 1; i >= 0; i--) { if (!queries[i].finished) { frame_is_valid = false; - std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n"; + CLOG_ERROR(&LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n"); } break; } diff --git a/source/blender/gpu/opengl/gl_debug.cc b/source/blender/gpu/opengl/gl_debug.cc index 98012f52ae2..8e790661437 100644 --- a/source/blender/gpu/opengl/gl_debug.cc +++ b/source/blender/gpu/opengl/gl_debug.cc @@ -441,7 +441,7 @@ void GLContext::debug_group_end() break; } if (i == 0) { - std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n"; + CLOG_ERROR(&LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n"); } } } @@ -461,7 +461,7 @@ void GLContext::process_frame_timings() for (int i = queries.size() - 1; i >= 0; i--) { if (!queries[i].finished) { frame_is_valid = false; - std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n"; + CLOG_ERROR(&LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n"); } else { glGetQueryObjectiv(queries.last().handle_end, GL_QUERY_RESULT_AVAILABLE, &frame_is_ready); diff --git a/source/blender/gpu/vulkan/vk_debug.cc b/source/blender/gpu/vulkan/vk_debug.cc index 8d7b0765ecf..4af5b3e5452 100644 --- a/source/blender/gpu/vulkan/vk_debug.cc +++ b/source/blender/gpu/vulkan/vk_debug.cc @@ -53,7 +53,7 @@ void VKContext::debug_group_end() break; } if (i == 0) { - std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n"; + CLOG_ERROR(&LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n"); } } } @@ -71,7 +71,7 @@ void VKContext::process_frame_timings() for (int i = queries.size() - 1; i >= 0; i--) { if (!queries[i].finished) { frame_is_valid = false; - std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n"; + CLOG_ERROR(&LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n"); } break; } From 4a88c14d9565c76aceb96bdd3be23831ac99253d Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Fri, 6 Jun 2025 16:02:47 +0200 Subject: [PATCH 3/5] Fix #139917: Vulkan: Increase number of render frames The number of render frames influences how long discarded memory will be kept. There are platforms that require 5 frames to work succesfully. Pull Request: https://projects.blender.org/blender/blender/pulls/139934 --- intern/ghost/intern/GHOST_ContextVK.hh | 2 +- source/blender/gpu/vulkan/vk_device.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/intern/ghost/intern/GHOST_ContextVK.hh b/intern/ghost/intern/GHOST_ContextVK.hh index e1081e71e55..73dccfd30ea 100644 --- a/intern/ghost/intern/GHOST_ContextVK.hh +++ b/intern/ghost/intern/GHOST_ContextVK.hh @@ -98,7 +98,7 @@ struct GHOST_Frame { * Vulkan backend. Notably, VKThreadData::resource_pools_count must * match this value. */ -constexpr static uint32_t GHOST_FRAMES_IN_FLIGHT = 4; +constexpr static uint32_t GHOST_FRAMES_IN_FLIGHT = 5; class GHOST_ContextVK : public GHOST_Context { friend class GHOST_XrGraphicsBindingVulkan; diff --git a/source/blender/gpu/vulkan/vk_device.hh b/source/blender/gpu/vulkan/vk_device.hh index fc3d85932d0..51cfeac2667 100644 --- a/source/blender/gpu/vulkan/vk_device.hh +++ b/source/blender/gpu/vulkan/vk_device.hh @@ -102,7 +102,7 @@ class VKThreadData : public NonCopyable, NonMovable { * in flight used by GHOST. Therefore, this constant *must* always * match GHOST_ContextVK's GHOST_FRAMES_IN_FLIGHT. */ - static constexpr uint32_t resource_pools_count = 4; + static constexpr uint32_t resource_pools_count = 5; public: /** Thread ID this instance belongs to. */ From 9c305ef52260e82e8ab1cd039634975a92c5937e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Fri, 6 Jun 2025 16:25:19 +0200 Subject: [PATCH 4/5] Fix: GPU: Metal: Compilation error --- source/blender/gpu/metal/mtl_debug.mm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/blender/gpu/metal/mtl_debug.mm b/source/blender/gpu/metal/mtl_debug.mm index 85a17220746..56066e2700f 100644 --- a/source/blender/gpu/metal/mtl_debug.mm +++ b/source/blender/gpu/metal/mtl_debug.mm @@ -83,7 +83,7 @@ void MTLContext::debug_group_end() break; } if (i == 0) { - CLOG_ERROR(&LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n"); + CLOG_ERROR(&debug::LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n"); } } } @@ -104,7 +104,7 @@ void MTLContext::process_frame_timings() for (int i = queries.size() - 1; i >= 0; i--) { if (!queries[i].finished) { frame_is_valid = false; - CLOG_ERROR(&LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n"); + CLOG_ERROR(&debug::LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n"); } break; } From 36f11027566e5a995a23e14c68969178a2c74c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Fri, 6 Jun 2025 16:27:18 +0200 Subject: [PATCH 5/5] GPU: GLProgram: Use unique pointer instead of movable object This avoid issue with not moving the `std::mutex` that was added in #136991. Issue reported here https://projects.blender.org/blender/blender/pulls/139821#issuecomment-1592962 Pull Request: https://projects.blender.org/blender/blender/pulls/139937 --- source/blender/gpu/opengl/gl_shader.cc | 17 ++++++++++++----- source/blender/gpu/opengl/gl_shader.hh | 15 +-------------- 2 files changed, 13 insertions(+), 19 deletions(-) diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 1ee9898a0ae..7c46b2bd634 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -77,7 +77,10 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat /* NOTE: This is not threadsafe with regards to the specialization constants state access. * The shader creation must be externally synchronized. */ - main_program_ = &program_cache_.lookup_or_add_default(constants->values); + main_program_ = program_cache_ + .lookup_or_add_cb(constants->values, + []() { return std::make_unique(); }) + .get(); if (!main_program_->program_id) { main_program_->program_id = glCreateProgram(); debug::object_label(GL_PROGRAM, main_program_->program_id, name); @@ -86,7 +89,10 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat void GLShader::init() { - main_program_ = &program_cache_.lookup_or_add_default(constants->values); + main_program_ = program_cache_ + .lookup_or_add_cb(constants->values, + []() { return std::make_unique(); }) + .get(); if (!main_program_->program_id) { main_program_->program_id = glCreateProgram(); debug::object_label(GL_PROGRAM, main_program_->program_id, name); @@ -1612,7 +1618,8 @@ GLShader::GLProgram &GLShader::program_get(const shader::SpecializationConstants program_cache_mutex_.lock(); - GLProgram &program = program_cache_.lookup_or_add_default(constants_state->values); + GLProgram &program = *program_cache_.lookup_or_add_cb( + constants_state->values, []() { return std::make_unique(); }); program_cache_mutex_.unlock(); @@ -1881,7 +1888,7 @@ Shader *GLSubprocessShaderCompiler::compile_shader(const shader::ShaderCreateInf const shader::SpecializationConstants &constants = GPU_shader_get_default_constant_state( wrap(shader)); - if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values).program_id) || + if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values)->program_id) || !shader->post_finalize(&info)) { /* Compilation failed, try to compile it locally. */ @@ -1908,7 +1915,7 @@ void GLSubprocessShaderCompiler::specialize_shader(ShaderSpecialization &special auto program_get = [&]() -> GLShader::GLProgram * { if (shader->program_cache_.contains(specialization.constants.values)) { - return &shader->program_cache_.lookup(specialization.constants.values); + return shader->program_cache_.lookup(specialization.constants.values).get(); } return nullptr; }; diff --git a/source/blender/gpu/opengl/gl_shader.hh b/source/blender/gpu/opengl/gl_shader.hh index 6500bdc54b6..2d1257c2380 100644 --- a/source/blender/gpu/opengl/gl_shader.hh +++ b/source/blender/gpu/opengl/gl_shader.hh @@ -85,19 +85,6 @@ class GLShader : public Shader { std::mutex compilation_mutex; GLProgram() {} - GLProgram(GLProgram &&other) - { - program_id = other.program_id; - vert_shader = other.vert_shader; - geom_shader = other.geom_shader; - frag_shader = other.frag_shader; - compute_shader = other.compute_shader; - other.program_id = 0; - other.vert_shader = 0; - other.geom_shader = 0; - other.frag_shader = 0; - other.compute_shader = 0; - } ~GLProgram(); void program_link(StringRefNull shader_name); @@ -105,7 +92,7 @@ class GLShader : public Shader { using GLProgramCacheKey = Vector; /** Contains all specialized shader variants. */ - Map program_cache_; + Map> program_cache_; std::mutex program_cache_mutex_;