GPU: Metal: Add --profile-gpu support for CPU timing

The GPU implementation is a bit too complex
to implement for now.

As we are improving shader loading, having the
CPU timings is already helpful.

Note that `Map<size_t, int>` does not compile
on Clang.

This is exposing the `--profile-gpu` option on
all backends as the vulkan backend should follow
shortly.

Pull Request: https://projects.blender.org/blender/blender/pulls/139551
This commit is contained in:
Clément Foucault
2025-05-28 21:08:38 +02:00
committed by Clément Foucault
parent 5106c4e655
commit 7245262de8
5 changed files with 106 additions and 5 deletions

View File

@@ -2,6 +2,8 @@
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "BLI_map.hh"
#include "BLI_mutex.hh"
#include "BLI_string_ref.hh"
@@ -19,7 +21,7 @@ class ProfileReport {
private:
std::fstream _report;
Mutex _mutex;
Map<size_t, int> _thread_ids;
Map<uint64_t, int> _thread_ids;
ProfileReport()
{
@@ -69,6 +71,22 @@ class ProfileReport {
(cpu_end - cpu_start) / uint64_t(1000),
thread_id);
}
void add_group_cpu(StringRefNull name, uint64_t cpu_start, uint64_t cpu_end)
{
std::scoped_lock lock(_mutex);
size_t thread_hash = std::hash<std::thread::id>()(std::this_thread::get_id());
int thread_id = _thread_ids.lookup_or_add(thread_hash, _thread_ids.size());
_report << fmt::format(
",\n"
R"({{"name":"{}","ph":"X","ts":{},"dur":{},"pid":2,"tid":{}}})",
name.c_str(),
cpu_start / uint64_t(1000),
(cpu_end - cpu_start) / uint64_t(1000),
thread_id);
}
};
} // namespace blender::gpu

View File

@@ -33,6 +33,7 @@
#include <Cocoa/Cocoa.h>
#include <Metal/Metal.h>
#include <QuartzCore/QuartzCore.h>
#include <chrono>
#include <mutex>
@class CAMetalLayer;
@@ -776,6 +777,23 @@ class MTLContext : public Context {
GPUVertFormat dummy_vertformat_[GPU_SAMPLER_TYPE_MAX];
VertBuf *dummy_verts_[GPU_SAMPLER_TYPE_MAX] = {nullptr};
/* Debug scope timings. Adapted form GLContext::TimeQuery.
* Only supports CPU timings for now. */
struct ScopeTimings {
using Clock = std::chrono::steady_clock;
using TimePoint = Clock::time_point;
using Nanoseconds = std::chrono::nanoseconds;
static TimePoint epoch;
std::string name;
bool finished;
TimePoint cpu_start, cpu_end;
};
Vector<ScopeTimings> scope_timings;
void process_frame_timings();
public:
/* GPUContext interface. */
MTLContext(void *ghost_window, void *ghost_context);

View File

@@ -374,6 +374,8 @@ MTLContext::~MTLContext()
if (this->device) {
[this->device release];
}
this->process_frame_timings();
}
void MTLContext::begin_frame()
@@ -396,6 +398,8 @@ void MTLContext::end_frame()
/* Increment frame counter. */
is_inside_frame_ = false;
this->process_frame_timings();
}
void MTLContext::check_error(const char * /*info*/)

View File

@@ -23,6 +23,8 @@
#include "CLG_log.h"
#include "gpu_profile_report.hh"
#include <utility>
namespace blender::gpu::debug {
@@ -50,6 +52,17 @@ void MTLContext::debug_group_begin(const char *name, int index)
if (G.debug & G_DEBUG_GPU) {
this->main_command_buffer.push_debug_group(name, index);
}
if (!G.profile_gpu) {
return;
}
ScopeTimings timings = {};
timings.name = name;
timings.finished = false;
timings.cpu_start = ScopeTimings::Clock::now();
scope_timings.append(timings);
}
void MTLContext::debug_group_end()
@@ -57,6 +70,56 @@ void MTLContext::debug_group_end()
if (G.debug & G_DEBUG_GPU) {
this->main_command_buffer.pop_debug_group();
}
if (!G.profile_gpu) {
return;
}
for (int i = scope_timings.size() - 1; i >= 0; i--) {
ScopeTimings &query = scope_timings[i];
if (!query.finished) {
query.finished = true;
query.cpu_end = ScopeTimings::Clock::now();
break;
}
if (i == 0) {
std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n";
}
}
}
MTLContext::ScopeTimings::TimePoint MTLContext::ScopeTimings::epoch =
MTLContext::ScopeTimings::Clock::now();
void MTLContext::process_frame_timings()
{
if (!G.profile_gpu) {
return;
}
Vector<ScopeTimings> &queries = scope_timings;
bool frame_is_valid = !queries.is_empty();
for (int i = queries.size() - 1; i >= 0; i--) {
if (!queries[i].finished) {
frame_is_valid = false;
std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n";
}
break;
}
if (!frame_is_valid) {
return;
}
for (ScopeTimings &query : queries) {
ScopeTimings::Nanoseconds begin = query.cpu_start - ScopeTimings::epoch;
ScopeTimings::Nanoseconds end = query.cpu_end - ScopeTimings::epoch;
ProfileReport::get().add_group_cpu(query.name, begin.count(), end.count());
}
queries.clear();
}
bool MTLContext::debug_capture_begin(const char * /*title*/)

View File

@@ -784,8 +784,8 @@ static void print_help(bArgs *ba, bool all)
BLI_args_print_arg_doc(ba, "--gpu-backend");
# ifdef WITH_OPENGL_BACKEND
BLI_args_print_arg_doc(ba, "--gpu-compilation-subprocesses");
BLI_args_print_arg_doc(ba, "--profile-gpu");
# endif
BLI_args_print_arg_doc(ba, "--profile-gpu");
PRINT("\n");
PRINT("Misc Options:\n");
@@ -2512,7 +2512,6 @@ static int arg_handle_addons_set(int argc, const char **argv, void *data)
return 0;
}
# ifdef WITH_OPENGL_BACKEND
static const char arg_handle_profile_gpu_set_doc[] =
"\n"
"\tEnable CPU & GPU performance profiling for GPU debug groups\n"
@@ -2522,7 +2521,6 @@ static int arg_handle_profile_gpu_set(int /*argc*/, const char ** /*argv*/, void
G.profile_gpu = true;
return 0;
}
# endif
/**
* Implementation for #arg_handle_load_last_file, also used by `--open-last`.
@@ -2694,8 +2692,8 @@ void main_args_setup(bContext *C, bArgs *ba, bool all)
"--gpu-compilation-subprocesses",
CB(arg_handle_gpu_compilation_subprocesses_set),
nullptr);
BLI_args_add(ba, nullptr, "--profile-gpu", CB(arg_handle_profile_gpu_set), nullptr);
# endif
BLI_args_add(ba, nullptr, "--profile-gpu", CB(arg_handle_profile_gpu_set), nullptr);
/* Pass: Background Mode & Settings
*