From 7245262de896c63231b6921cab40378edd5794be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cle=CC=81ment=20Foucault?= Date: Wed, 28 May 2025 21:08:38 +0200 Subject: [PATCH] GPU: Metal: Add `--profile-gpu` support for CPU timing The GPU implementation is a bit too complex to implement for now. As we are improving shader loading, having the CPU timings is already helpful. Note that `Map` does not compile on Clang. This is exposing the `--profile-gpu` option on all backends as the vulkan backend should follow shortly. Pull Request: https://projects.blender.org/blender/blender/pulls/139551 --- .../blender/gpu/intern/gpu_profile_report.hh | 20 +++++- source/blender/gpu/metal/mtl_context.hh | 18 ++++++ source/blender/gpu/metal/mtl_context.mm | 4 ++ source/blender/gpu/metal/mtl_debug.mm | 63 +++++++++++++++++++ source/creator/creator_args.cc | 6 +- 5 files changed, 106 insertions(+), 5 deletions(-) diff --git a/source/blender/gpu/intern/gpu_profile_report.hh b/source/blender/gpu/intern/gpu_profile_report.hh index c69ce5f3d29..5bfec4eef08 100644 --- a/source/blender/gpu/intern/gpu_profile_report.hh +++ b/source/blender/gpu/intern/gpu_profile_report.hh @@ -2,6 +2,8 @@ * * SPDX-License-Identifier: GPL-2.0-or-later */ +#pragma once + #include "BLI_map.hh" #include "BLI_mutex.hh" #include "BLI_string_ref.hh" @@ -19,7 +21,7 @@ class ProfileReport { private: std::fstream _report; Mutex _mutex; - Map _thread_ids; + Map _thread_ids; ProfileReport() { @@ -69,6 +71,22 @@ class ProfileReport { (cpu_end - cpu_start) / uint64_t(1000), thread_id); } + + void add_group_cpu(StringRefNull name, uint64_t cpu_start, uint64_t cpu_end) + { + std::scoped_lock lock(_mutex); + + size_t thread_hash = std::hash()(std::this_thread::get_id()); + int thread_id = _thread_ids.lookup_or_add(thread_hash, _thread_ids.size()); + + _report << fmt::format( + ",\n" + R"({{"name":"{}","ph":"X","ts":{},"dur":{},"pid":2,"tid":{}}})", + name.c_str(), + cpu_start / uint64_t(1000), + (cpu_end - cpu_start) / uint64_t(1000), + thread_id); + } }; } // namespace blender::gpu diff --git a/source/blender/gpu/metal/mtl_context.hh b/source/blender/gpu/metal/mtl_context.hh index 958ab493459..8b859196e0e 100644 --- a/source/blender/gpu/metal/mtl_context.hh +++ b/source/blender/gpu/metal/mtl_context.hh @@ -33,6 +33,7 @@ #include #include #include +#include #include @class CAMetalLayer; @@ -776,6 +777,23 @@ class MTLContext : public Context { GPUVertFormat dummy_vertformat_[GPU_SAMPLER_TYPE_MAX]; VertBuf *dummy_verts_[GPU_SAMPLER_TYPE_MAX] = {nullptr}; + /* Debug scope timings. Adapted form GLContext::TimeQuery. + * Only supports CPU timings for now. */ + struct ScopeTimings { + using Clock = std::chrono::steady_clock; + using TimePoint = Clock::time_point; + using Nanoseconds = std::chrono::nanoseconds; + + static TimePoint epoch; + + std::string name; + bool finished; + TimePoint cpu_start, cpu_end; + }; + Vector scope_timings; + + void process_frame_timings(); + public: /* GPUContext interface. */ MTLContext(void *ghost_window, void *ghost_context); diff --git a/source/blender/gpu/metal/mtl_context.mm b/source/blender/gpu/metal/mtl_context.mm index e3c9bed7fad..afcf061d4dd 100644 --- a/source/blender/gpu/metal/mtl_context.mm +++ b/source/blender/gpu/metal/mtl_context.mm @@ -374,6 +374,8 @@ MTLContext::~MTLContext() if (this->device) { [this->device release]; } + + this->process_frame_timings(); } void MTLContext::begin_frame() @@ -396,6 +398,8 @@ void MTLContext::end_frame() /* Increment frame counter. */ is_inside_frame_ = false; + + this->process_frame_timings(); } void MTLContext::check_error(const char * /*info*/) diff --git a/source/blender/gpu/metal/mtl_debug.mm b/source/blender/gpu/metal/mtl_debug.mm index c9b929c770d..dc95e9c4518 100644 --- a/source/blender/gpu/metal/mtl_debug.mm +++ b/source/blender/gpu/metal/mtl_debug.mm @@ -23,6 +23,8 @@ #include "CLG_log.h" +#include "gpu_profile_report.hh" + #include namespace blender::gpu::debug { @@ -50,6 +52,17 @@ void MTLContext::debug_group_begin(const char *name, int index) if (G.debug & G_DEBUG_GPU) { this->main_command_buffer.push_debug_group(name, index); } + + if (!G.profile_gpu) { + return; + } + + ScopeTimings timings = {}; + timings.name = name; + timings.finished = false; + timings.cpu_start = ScopeTimings::Clock::now(); + + scope_timings.append(timings); } void MTLContext::debug_group_end() @@ -57,6 +70,56 @@ void MTLContext::debug_group_end() if (G.debug & G_DEBUG_GPU) { this->main_command_buffer.pop_debug_group(); } + + if (!G.profile_gpu) { + return; + } + + for (int i = scope_timings.size() - 1; i >= 0; i--) { + ScopeTimings &query = scope_timings[i]; + if (!query.finished) { + query.finished = true; + query.cpu_end = ScopeTimings::Clock::now(); + break; + } + if (i == 0) { + std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n"; + } + } +} + +MTLContext::ScopeTimings::TimePoint MTLContext::ScopeTimings::epoch = + MTLContext::ScopeTimings::Clock::now(); + +void MTLContext::process_frame_timings() +{ + if (!G.profile_gpu) { + return; + } + + Vector &queries = scope_timings; + + bool frame_is_valid = !queries.is_empty(); + + for (int i = queries.size() - 1; i >= 0; i--) { + if (!queries[i].finished) { + frame_is_valid = false; + std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n"; + } + break; + } + + if (!frame_is_valid) { + return; + } + + for (ScopeTimings &query : queries) { + ScopeTimings::Nanoseconds begin = query.cpu_start - ScopeTimings::epoch; + ScopeTimings::Nanoseconds end = query.cpu_end - ScopeTimings::epoch; + ProfileReport::get().add_group_cpu(query.name, begin.count(), end.count()); + } + + queries.clear(); } bool MTLContext::debug_capture_begin(const char * /*title*/) diff --git a/source/creator/creator_args.cc b/source/creator/creator_args.cc index 3cea195ec97..45d4e9ff352 100644 --- a/source/creator/creator_args.cc +++ b/source/creator/creator_args.cc @@ -784,8 +784,8 @@ static void print_help(bArgs *ba, bool all) BLI_args_print_arg_doc(ba, "--gpu-backend"); # ifdef WITH_OPENGL_BACKEND BLI_args_print_arg_doc(ba, "--gpu-compilation-subprocesses"); - BLI_args_print_arg_doc(ba, "--profile-gpu"); # endif + BLI_args_print_arg_doc(ba, "--profile-gpu"); PRINT("\n"); PRINT("Misc Options:\n"); @@ -2512,7 +2512,6 @@ static int arg_handle_addons_set(int argc, const char **argv, void *data) return 0; } -# ifdef WITH_OPENGL_BACKEND static const char arg_handle_profile_gpu_set_doc[] = "\n" "\tEnable CPU & GPU performance profiling for GPU debug groups\n" @@ -2522,7 +2521,6 @@ static int arg_handle_profile_gpu_set(int /*argc*/, const char ** /*argv*/, void G.profile_gpu = true; return 0; } -# endif /** * Implementation for #arg_handle_load_last_file, also used by `--open-last`. @@ -2694,8 +2692,8 @@ void main_args_setup(bContext *C, bArgs *ba, bool all) "--gpu-compilation-subprocesses", CB(arg_handle_gpu_compilation_subprocesses_set), nullptr); - BLI_args_add(ba, nullptr, "--profile-gpu", CB(arg_handle_profile_gpu_set), nullptr); # endif + BLI_args_add(ba, nullptr, "--profile-gpu", CB(arg_handle_profile_gpu_set), nullptr); /* Pass: Background Mode & Settings *