GPU: Metal: Add --profile-gpu support for CPU timing
The GPU implementation is a bit too complex to implement for now. As we are improving shader loading, having the CPU timings is already helpful. Note that `Map<size_t, int>` does not compile on Clang. This is exposing the `--profile-gpu` option on all backends as the vulkan backend should follow shortly. Pull Request: https://projects.blender.org/blender/blender/pulls/139551
This commit is contained in:
committed by
Clément Foucault
parent
5106c4e655
commit
7245262de8
@@ -2,6 +2,8 @@
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_map.hh"
|
||||
#include "BLI_mutex.hh"
|
||||
#include "BLI_string_ref.hh"
|
||||
@@ -19,7 +21,7 @@ class ProfileReport {
|
||||
private:
|
||||
std::fstream _report;
|
||||
Mutex _mutex;
|
||||
Map<size_t, int> _thread_ids;
|
||||
Map<uint64_t, int> _thread_ids;
|
||||
|
||||
ProfileReport()
|
||||
{
|
||||
@@ -69,6 +71,22 @@ class ProfileReport {
|
||||
(cpu_end - cpu_start) / uint64_t(1000),
|
||||
thread_id);
|
||||
}
|
||||
|
||||
void add_group_cpu(StringRefNull name, uint64_t cpu_start, uint64_t cpu_end)
|
||||
{
|
||||
std::scoped_lock lock(_mutex);
|
||||
|
||||
size_t thread_hash = std::hash<std::thread::id>()(std::this_thread::get_id());
|
||||
int thread_id = _thread_ids.lookup_or_add(thread_hash, _thread_ids.size());
|
||||
|
||||
_report << fmt::format(
|
||||
",\n"
|
||||
R"({{"name":"{}","ph":"X","ts":{},"dur":{},"pid":2,"tid":{}}})",
|
||||
name.c_str(),
|
||||
cpu_start / uint64_t(1000),
|
||||
(cpu_end - cpu_start) / uint64_t(1000),
|
||||
thread_id);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace blender::gpu
|
||||
|
||||
@@ -33,6 +33,7 @@
|
||||
#include <Cocoa/Cocoa.h>
|
||||
#include <Metal/Metal.h>
|
||||
#include <QuartzCore/QuartzCore.h>
|
||||
#include <chrono>
|
||||
#include <mutex>
|
||||
|
||||
@class CAMetalLayer;
|
||||
@@ -776,6 +777,23 @@ class MTLContext : public Context {
|
||||
GPUVertFormat dummy_vertformat_[GPU_SAMPLER_TYPE_MAX];
|
||||
VertBuf *dummy_verts_[GPU_SAMPLER_TYPE_MAX] = {nullptr};
|
||||
|
||||
/* Debug scope timings. Adapted form GLContext::TimeQuery.
|
||||
* Only supports CPU timings for now. */
|
||||
struct ScopeTimings {
|
||||
using Clock = std::chrono::steady_clock;
|
||||
using TimePoint = Clock::time_point;
|
||||
using Nanoseconds = std::chrono::nanoseconds;
|
||||
|
||||
static TimePoint epoch;
|
||||
|
||||
std::string name;
|
||||
bool finished;
|
||||
TimePoint cpu_start, cpu_end;
|
||||
};
|
||||
Vector<ScopeTimings> scope_timings;
|
||||
|
||||
void process_frame_timings();
|
||||
|
||||
public:
|
||||
/* GPUContext interface. */
|
||||
MTLContext(void *ghost_window, void *ghost_context);
|
||||
|
||||
@@ -374,6 +374,8 @@ MTLContext::~MTLContext()
|
||||
if (this->device) {
|
||||
[this->device release];
|
||||
}
|
||||
|
||||
this->process_frame_timings();
|
||||
}
|
||||
|
||||
void MTLContext::begin_frame()
|
||||
@@ -396,6 +398,8 @@ void MTLContext::end_frame()
|
||||
|
||||
/* Increment frame counter. */
|
||||
is_inside_frame_ = false;
|
||||
|
||||
this->process_frame_timings();
|
||||
}
|
||||
|
||||
void MTLContext::check_error(const char * /*info*/)
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
|
||||
#include "CLG_log.h"
|
||||
|
||||
#include "gpu_profile_report.hh"
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace blender::gpu::debug {
|
||||
@@ -50,6 +52,17 @@ void MTLContext::debug_group_begin(const char *name, int index)
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
this->main_command_buffer.push_debug_group(name, index);
|
||||
}
|
||||
|
||||
if (!G.profile_gpu) {
|
||||
return;
|
||||
}
|
||||
|
||||
ScopeTimings timings = {};
|
||||
timings.name = name;
|
||||
timings.finished = false;
|
||||
timings.cpu_start = ScopeTimings::Clock::now();
|
||||
|
||||
scope_timings.append(timings);
|
||||
}
|
||||
|
||||
void MTLContext::debug_group_end()
|
||||
@@ -57,6 +70,56 @@ void MTLContext::debug_group_end()
|
||||
if (G.debug & G_DEBUG_GPU) {
|
||||
this->main_command_buffer.pop_debug_group();
|
||||
}
|
||||
|
||||
if (!G.profile_gpu) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = scope_timings.size() - 1; i >= 0; i--) {
|
||||
ScopeTimings &query = scope_timings[i];
|
||||
if (!query.finished) {
|
||||
query.finished = true;
|
||||
query.cpu_end = ScopeTimings::Clock::now();
|
||||
break;
|
||||
}
|
||||
if (i == 0) {
|
||||
std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MTLContext::ScopeTimings::TimePoint MTLContext::ScopeTimings::epoch =
|
||||
MTLContext::ScopeTimings::Clock::now();
|
||||
|
||||
void MTLContext::process_frame_timings()
|
||||
{
|
||||
if (!G.profile_gpu) {
|
||||
return;
|
||||
}
|
||||
|
||||
Vector<ScopeTimings> &queries = scope_timings;
|
||||
|
||||
bool frame_is_valid = !queries.is_empty();
|
||||
|
||||
for (int i = queries.size() - 1; i >= 0; i--) {
|
||||
if (!queries[i].finished) {
|
||||
frame_is_valid = false;
|
||||
std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!frame_is_valid) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (ScopeTimings &query : queries) {
|
||||
ScopeTimings::Nanoseconds begin = query.cpu_start - ScopeTimings::epoch;
|
||||
ScopeTimings::Nanoseconds end = query.cpu_end - ScopeTimings::epoch;
|
||||
ProfileReport::get().add_group_cpu(query.name, begin.count(), end.count());
|
||||
}
|
||||
|
||||
queries.clear();
|
||||
}
|
||||
|
||||
bool MTLContext::debug_capture_begin(const char * /*title*/)
|
||||
|
||||
@@ -784,8 +784,8 @@ static void print_help(bArgs *ba, bool all)
|
||||
BLI_args_print_arg_doc(ba, "--gpu-backend");
|
||||
# ifdef WITH_OPENGL_BACKEND
|
||||
BLI_args_print_arg_doc(ba, "--gpu-compilation-subprocesses");
|
||||
BLI_args_print_arg_doc(ba, "--profile-gpu");
|
||||
# endif
|
||||
BLI_args_print_arg_doc(ba, "--profile-gpu");
|
||||
|
||||
PRINT("\n");
|
||||
PRINT("Misc Options:\n");
|
||||
@@ -2512,7 +2512,6 @@ static int arg_handle_addons_set(int argc, const char **argv, void *data)
|
||||
return 0;
|
||||
}
|
||||
|
||||
# ifdef WITH_OPENGL_BACKEND
|
||||
static const char arg_handle_profile_gpu_set_doc[] =
|
||||
"\n"
|
||||
"\tEnable CPU & GPU performance profiling for GPU debug groups\n"
|
||||
@@ -2522,7 +2521,6 @@ static int arg_handle_profile_gpu_set(int /*argc*/, const char ** /*argv*/, void
|
||||
G.profile_gpu = true;
|
||||
return 0;
|
||||
}
|
||||
# endif
|
||||
|
||||
/**
|
||||
* Implementation for #arg_handle_load_last_file, also used by `--open-last`.
|
||||
@@ -2694,8 +2692,8 @@ void main_args_setup(bContext *C, bArgs *ba, bool all)
|
||||
"--gpu-compilation-subprocesses",
|
||||
CB(arg_handle_gpu_compilation_subprocesses_set),
|
||||
nullptr);
|
||||
BLI_args_add(ba, nullptr, "--profile-gpu", CB(arg_handle_profile_gpu_set), nullptr);
|
||||
# endif
|
||||
BLI_args_add(ba, nullptr, "--profile-gpu", CB(arg_handle_profile_gpu_set), nullptr);
|
||||
|
||||
/* Pass: Background Mode & Settings
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user