Vulkan: Add CPU profiling

This has limited use cases since it doesn't profile the heavy part of the vulkan backend. Almost 1:1 port of the metal implementation from #139551. Doesn't cover rendergraph submission nor GPU timings. Pull Request: https://projects.blender.org/blender/blender/pulls/139899
2025-06-06 14:39:51 +02:00
parent a883f555ae
commit 023865b314
4 changed files with 109 additions and 2 deletions
--- a/source/blender/gpu/intern/gpu_shader.cc
+++ b/source/blender/gpu/intern/gpu_shader.cc
@@ -19,6 +19,7 @@

 #include "gpu_backend.hh"
 #include "gpu_context_private.hh"
+#include "gpu_profile_report.hh"
 #include "gpu_shader_create_info.hh"
 #include "gpu_shader_create_info_private.hh"
 #include "gpu_shader_dependency_private.hh"
@@ -805,14 +806,22 @@ void Shader::set_framebuffer_srgb_target(int use_srgb_to_linear)

 Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
 {
+  using Clock = std::chrono::steady_clock;
+  using TimePoint = Clock::time_point;
+
  using namespace blender::gpu::shader;
  const_cast<ShaderCreateInfo &>(info).finalize();

+  TimePoint start_time;
+
  if (Context::get()) {
    /* Context can be null in Vulkan compilation threads. */
    GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP);
    GPU_debug_group_begin(info.name_.c_str());
  }
+  else {
+    start_time = Clock::now();
+  }

  const std::string error = info.check_error();
  if (!error.empty()) {
@@ -936,6 +945,16 @@ Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_ba
    GPU_debug_group_end();
    GPU_debug_group_end();
  }
+  else {
+    TimePoint end_time = Clock::now();
+    /* Note: Used by the vulkan backend. Use the same time_since_epoch as process_frame_timings. */
+    ProfileReport::get().add_group_cpu(GPU_DEBUG_SHADER_COMPILATION_GROUP,
+                                       start_time.time_since_epoch().count(),
+                                       end_time.time_since_epoch().count());
+    ProfileReport::get().add_group_cpu(info.name_.c_str(),
+                                       start_time.time_since_epoch().count(),
+                                       end_time.time_since_epoch().count());
+  }

  return shader;
 }
--- a/source/blender/gpu/vulkan/vk_context.cc
+++ b/source/blender/gpu/vulkan/vk_context.cc
@@ -45,6 +45,8 @@ VKContext::~VKContext()
  free_resources();
  VKBackend::get().device.context_unregister(*this);

+  this->process_frame_timings();
+
  imm = nullptr;
 }

@@ -108,8 +110,13 @@ void VKContext::activate()
  if (!render_graph_.has_value()) {
    render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
        *device.render_graph_new());
+    /* Recreate the debug group stack for the new graph.
+     * Note: there is no associated `debug_group_end` as the graph groups
+     * are implicitly closed on submission. */
    for (const StringRef &group : debug_stack) {
-      debug_group_begin(std::string(group).c_str(), 0);
+      std::string str_group = group;
+      render_graph_.value().get().debug_group_begin(str_group.c_str(),
+                                                    debug::get_debug_group_color(str_group));
    }
  }

@@ -138,6 +145,7 @@ void VKContext::end_frame()
 {
  VKDevice &device = VKBackend::get().device;
  device.orphaned_data.destroy_discarded_resources(device);
+  this->process_frame_timings();
 }

 void VKContext::flush()
@@ -175,8 +183,13 @@ TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags,
  if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) {
    render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
        *device.render_graph_new());
+    /* Recreate the debug group stack for the new graph.
+     * Note: there is no associated `debug_group_end` as the graph groups
+     * are implicitly closed on submission. */
    for (const StringRef &group : debug_stack) {
-      debug_group_begin(std::string(group).c_str(), 0);
+      std::string str_group = group;
+      render_graph_.value().get().debug_group_begin(str_group.c_str(),
+                                                    debug::get_debug_group_color(str_group));
    }
  }
  return timeline;
--- a/source/blender/gpu/vulkan/vk_context.hh
+++ b/source/blender/gpu/vulkan/vk_context.hh
@@ -55,6 +55,21 @@ class VKContext : public Context, NonCopyable {
  /* Active shader specialization constants state. */
  shader::SpecializationConstants constants_state_;

+  /* Debug scope timings. Adapted form GLContext::TimeQuery.
+   * Only supports CPU timings for now. */
+  struct ScopeTimings {
+    using Clock = std::chrono::steady_clock;
+    using TimePoint = Clock::time_point;
+    using Nanoseconds = std::chrono::nanoseconds;
+
+    std::string name;
+    bool finished;
+    TimePoint cpu_start, cpu_end;
+  };
+  Vector<ScopeTimings> scope_timings;
+
+  void process_frame_timings();
+
 public:
  VKDiscardPool discard_pool;

--- a/source/blender/gpu/vulkan/vk_debug.cc
+++ b/source/blender/gpu/vulkan/vk_debug.cc
@@ -16,17 +16,77 @@
 #include "vk_debug.hh"
 #include "vk_to_string.hh"

+#include "gpu_profile_report.hh"
+
 static CLG_LogRef LOG = {"gpu.vulkan"};

 namespace blender::gpu {
 void VKContext::debug_group_begin(const char *name, int)
 {
  render_graph().debug_group_begin(name, debug::get_debug_group_color(name));
+
+  if (!G.profile_gpu) {
+    return;
+  }
+
+  ScopeTimings timings = {};
+  timings.name = name;
+  timings.finished = false;
+  timings.cpu_start = ScopeTimings::Clock::now();
+
+  scope_timings.append(timings);
 }

 void VKContext::debug_group_end()
 {
  render_graph().debug_group_end();
+
+  if (!G.profile_gpu) {
+    return;
+  }
+
+  for (int i = scope_timings.size() - 1; i >= 0; i--) {
+    ScopeTimings &query = scope_timings[i];
+    if (!query.finished) {
+      query.finished = true;
+      query.cpu_end = ScopeTimings::Clock::now();
+      break;
+    }
+    if (i == 0) {
+      std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n";
+    }
+  }
+}
+
+void VKContext::process_frame_timings()
+{
+  if (!G.profile_gpu) {
+    return;
+  }
+
+  Vector<ScopeTimings> &queries = scope_timings;
+
+  bool frame_is_valid = !queries.is_empty();
+
+  for (int i = queries.size() - 1; i >= 0; i--) {
+    if (!queries[i].finished) {
+      frame_is_valid = false;
+      std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n";
+    }
+    break;
+  }
+
+  if (!frame_is_valid) {
+    return;
+  }
+
+  for (ScopeTimings &query : queries) {
+    ProfileReport::get().add_group_cpu(query.name,
+                                       query.cpu_start.time_since_epoch().count(),
+                                       query.cpu_end.time_since_epoch().count());
+  }
+
+  queries.clear();
 }

 bool VKContext::debug_capture_begin(const char *title)