Merge branch 'blender-v4.5-release'
This commit is contained in:
@@ -98,7 +98,7 @@ struct GHOST_Frame {
|
||||
* Vulkan backend. Notably, VKThreadData::resource_pools_count must
|
||||
* match this value.
|
||||
*/
|
||||
constexpr static uint32_t GHOST_FRAMES_IN_FLIGHT = 4;
|
||||
constexpr static uint32_t GHOST_FRAMES_IN_FLIGHT = 5;
|
||||
|
||||
class GHOST_ContextVK : public GHOST_Context {
|
||||
friend class GHOST_XrGraphicsBindingVulkan;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
|
||||
#include "gpu_backend.hh"
|
||||
#include "gpu_context_private.hh"
|
||||
#include "gpu_profile_report.hh"
|
||||
#include "gpu_shader_create_info.hh"
|
||||
#include "gpu_shader_create_info_private.hh"
|
||||
#include "gpu_shader_dependency_private.hh"
|
||||
@@ -805,14 +806,22 @@ void Shader::set_framebuffer_srgb_target(int use_srgb_to_linear)
|
||||
|
||||
Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_batch_compilation)
|
||||
{
|
||||
using Clock = std::chrono::steady_clock;
|
||||
using TimePoint = Clock::time_point;
|
||||
|
||||
using namespace blender::gpu::shader;
|
||||
const_cast<ShaderCreateInfo &>(info).finalize();
|
||||
|
||||
TimePoint start_time;
|
||||
|
||||
if (Context::get()) {
|
||||
/* Context can be null in Vulkan compilation threads. */
|
||||
GPU_debug_group_begin(GPU_DEBUG_SHADER_COMPILATION_GROUP);
|
||||
GPU_debug_group_begin(info.name_.c_str());
|
||||
}
|
||||
else {
|
||||
start_time = Clock::now();
|
||||
}
|
||||
|
||||
const std::string error = info.check_error();
|
||||
if (!error.empty()) {
|
||||
@@ -936,6 +945,16 @@ Shader *ShaderCompiler::compile(const shader::ShaderCreateInfo &info, bool is_ba
|
||||
GPU_debug_group_end();
|
||||
GPU_debug_group_end();
|
||||
}
|
||||
else {
|
||||
TimePoint end_time = Clock::now();
|
||||
/* Note: Used by the vulkan backend. Use the same time_since_epoch as process_frame_timings. */
|
||||
ProfileReport::get().add_group_cpu(GPU_DEBUG_SHADER_COMPILATION_GROUP,
|
||||
start_time.time_since_epoch().count(),
|
||||
end_time.time_since_epoch().count());
|
||||
ProfileReport::get().add_group_cpu(info.name_.c_str(),
|
||||
start_time.time_since_epoch().count(),
|
||||
end_time.time_since_epoch().count());
|
||||
}
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
@@ -83,7 +83,7 @@ void MTLContext::debug_group_end()
|
||||
break;
|
||||
}
|
||||
if (i == 0) {
|
||||
std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n";
|
||||
CLOG_ERROR(&debug::LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -104,7 +104,7 @@ void MTLContext::process_frame_timings()
|
||||
for (int i = queries.size() - 1; i >= 0; i--) {
|
||||
if (!queries[i].finished) {
|
||||
frame_is_valid = false;
|
||||
std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n";
|
||||
CLOG_ERROR(&debug::LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -441,7 +441,7 @@ void GLContext::debug_group_end()
|
||||
break;
|
||||
}
|
||||
if (i == 0) {
|
||||
std::cout << "Profile GPU error: Extra GPU_debug_group_end() call.\n";
|
||||
CLOG_ERROR(&LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -461,7 +461,7 @@ void GLContext::process_frame_timings()
|
||||
for (int i = queries.size() - 1; i >= 0; i--) {
|
||||
if (!queries[i].finished) {
|
||||
frame_is_valid = false;
|
||||
std::cout << "Profile GPU error: Missing GPU_debug_group_end() call\n";
|
||||
CLOG_ERROR(&LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n");
|
||||
}
|
||||
else {
|
||||
glGetQueryObjectiv(queries.last().handle_end, GL_QUERY_RESULT_AVAILABLE, &frame_is_ready);
|
||||
|
||||
@@ -77,7 +77,10 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat
|
||||
|
||||
/* NOTE: This is not threadsafe with regards to the specialization constants state access.
|
||||
* The shader creation must be externally synchronized. */
|
||||
main_program_ = &program_cache_.lookup_or_add_default(constants->values);
|
||||
main_program_ = program_cache_
|
||||
.lookup_or_add_cb(constants->values,
|
||||
[]() { return std::make_unique<GLProgram>(); })
|
||||
.get();
|
||||
if (!main_program_->program_id) {
|
||||
main_program_->program_id = glCreateProgram();
|
||||
debug::object_label(GL_PROGRAM, main_program_->program_id, name);
|
||||
@@ -86,7 +89,10 @@ void GLShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat
|
||||
|
||||
void GLShader::init()
|
||||
{
|
||||
main_program_ = &program_cache_.lookup_or_add_default(constants->values);
|
||||
main_program_ = program_cache_
|
||||
.lookup_or_add_cb(constants->values,
|
||||
[]() { return std::make_unique<GLProgram>(); })
|
||||
.get();
|
||||
if (!main_program_->program_id) {
|
||||
main_program_->program_id = glCreateProgram();
|
||||
debug::object_label(GL_PROGRAM, main_program_->program_id, name);
|
||||
@@ -1612,7 +1618,8 @@ GLShader::GLProgram &GLShader::program_get(const shader::SpecializationConstants
|
||||
|
||||
program_cache_mutex_.lock();
|
||||
|
||||
GLProgram &program = program_cache_.lookup_or_add_default(constants_state->values);
|
||||
GLProgram &program = *program_cache_.lookup_or_add_cb(
|
||||
constants_state->values, []() { return std::make_unique<GLProgram>(); });
|
||||
|
||||
program_cache_mutex_.unlock();
|
||||
|
||||
@@ -1881,7 +1888,7 @@ Shader *GLSubprocessShaderCompiler::compile_shader(const shader::ShaderCreateInf
|
||||
const shader::SpecializationConstants &constants = GPU_shader_get_default_constant_state(
|
||||
wrap(shader));
|
||||
|
||||
if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values).program_id) ||
|
||||
if (!worker->load_program_binary(shader->program_cache_.lookup(constants.values)->program_id) ||
|
||||
!shader->post_finalize(&info))
|
||||
{
|
||||
/* Compilation failed, try to compile it locally. */
|
||||
@@ -1908,7 +1915,7 @@ void GLSubprocessShaderCompiler::specialize_shader(ShaderSpecialization &special
|
||||
|
||||
auto program_get = [&]() -> GLShader::GLProgram * {
|
||||
if (shader->program_cache_.contains(specialization.constants.values)) {
|
||||
return &shader->program_cache_.lookup(specialization.constants.values);
|
||||
return shader->program_cache_.lookup(specialization.constants.values).get();
|
||||
}
|
||||
return nullptr;
|
||||
};
|
||||
|
||||
@@ -85,19 +85,6 @@ class GLShader : public Shader {
|
||||
std::mutex compilation_mutex;
|
||||
|
||||
GLProgram() {}
|
||||
GLProgram(GLProgram &&other)
|
||||
{
|
||||
program_id = other.program_id;
|
||||
vert_shader = other.vert_shader;
|
||||
geom_shader = other.geom_shader;
|
||||
frag_shader = other.frag_shader;
|
||||
compute_shader = other.compute_shader;
|
||||
other.program_id = 0;
|
||||
other.vert_shader = 0;
|
||||
other.geom_shader = 0;
|
||||
other.frag_shader = 0;
|
||||
other.compute_shader = 0;
|
||||
}
|
||||
~GLProgram();
|
||||
|
||||
void program_link(StringRefNull shader_name);
|
||||
@@ -105,7 +92,7 @@ class GLShader : public Shader {
|
||||
|
||||
using GLProgramCacheKey = Vector<shader::SpecializationConstant::Value>;
|
||||
/** Contains all specialized shader variants. */
|
||||
Map<GLProgramCacheKey, GLProgram> program_cache_;
|
||||
Map<GLProgramCacheKey, std::unique_ptr<GLProgram>> program_cache_;
|
||||
|
||||
std::mutex program_cache_mutex_;
|
||||
|
||||
|
||||
@@ -45,6 +45,8 @@ VKContext::~VKContext()
|
||||
free_resources();
|
||||
VKBackend::get().device.context_unregister(*this);
|
||||
|
||||
this->process_frame_timings();
|
||||
|
||||
imm = nullptr;
|
||||
}
|
||||
|
||||
@@ -108,8 +110,13 @@ void VKContext::activate()
|
||||
if (!render_graph_.has_value()) {
|
||||
render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
|
||||
*device.render_graph_new());
|
||||
/* Recreate the debug group stack for the new graph.
|
||||
* Note: there is no associated `debug_group_end` as the graph groups
|
||||
* are implicitly closed on submission. */
|
||||
for (const StringRef &group : debug_stack) {
|
||||
debug_group_begin(std::string(group).c_str(), 0);
|
||||
std::string str_group = group;
|
||||
render_graph_.value().get().debug_group_begin(str_group.c_str(),
|
||||
debug::get_debug_group_color(str_group));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -138,6 +145,7 @@ void VKContext::end_frame()
|
||||
{
|
||||
VKDevice &device = VKBackend::get().device;
|
||||
device.orphaned_data.destroy_discarded_resources(device);
|
||||
this->process_frame_timings();
|
||||
}
|
||||
|
||||
void VKContext::flush()
|
||||
@@ -175,8 +183,13 @@ TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags,
|
||||
if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) {
|
||||
render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
|
||||
*device.render_graph_new());
|
||||
/* Recreate the debug group stack for the new graph.
|
||||
* Note: there is no associated `debug_group_end` as the graph groups
|
||||
* are implicitly closed on submission. */
|
||||
for (const StringRef &group : debug_stack) {
|
||||
debug_group_begin(std::string(group).c_str(), 0);
|
||||
std::string str_group = group;
|
||||
render_graph_.value().get().debug_group_begin(str_group.c_str(),
|
||||
debug::get_debug_group_color(str_group));
|
||||
}
|
||||
}
|
||||
return timeline;
|
||||
|
||||
@@ -55,6 +55,21 @@ class VKContext : public Context, NonCopyable {
|
||||
/* Active shader specialization constants state. */
|
||||
shader::SpecializationConstants constants_state_;
|
||||
|
||||
/* Debug scope timings. Adapted form GLContext::TimeQuery.
|
||||
* Only supports CPU timings for now. */
|
||||
struct ScopeTimings {
|
||||
using Clock = std::chrono::steady_clock;
|
||||
using TimePoint = Clock::time_point;
|
||||
using Nanoseconds = std::chrono::nanoseconds;
|
||||
|
||||
std::string name;
|
||||
bool finished;
|
||||
TimePoint cpu_start, cpu_end;
|
||||
};
|
||||
Vector<ScopeTimings> scope_timings;
|
||||
|
||||
void process_frame_timings();
|
||||
|
||||
public:
|
||||
VKDiscardPool discard_pool;
|
||||
|
||||
|
||||
@@ -16,17 +16,77 @@
|
||||
#include "vk_debug.hh"
|
||||
#include "vk_to_string.hh"
|
||||
|
||||
#include "gpu_profile_report.hh"
|
||||
|
||||
static CLG_LogRef LOG = {"gpu.vulkan"};
|
||||
|
||||
namespace blender::gpu {
|
||||
void VKContext::debug_group_begin(const char *name, int)
|
||||
{
|
||||
render_graph().debug_group_begin(name, debug::get_debug_group_color(name));
|
||||
|
||||
if (!G.profile_gpu) {
|
||||
return;
|
||||
}
|
||||
|
||||
ScopeTimings timings = {};
|
||||
timings.name = name;
|
||||
timings.finished = false;
|
||||
timings.cpu_start = ScopeTimings::Clock::now();
|
||||
|
||||
scope_timings.append(timings);
|
||||
}
|
||||
|
||||
void VKContext::debug_group_end()
|
||||
{
|
||||
render_graph().debug_group_end();
|
||||
|
||||
if (!G.profile_gpu) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = scope_timings.size() - 1; i >= 0; i--) {
|
||||
ScopeTimings &query = scope_timings[i];
|
||||
if (!query.finished) {
|
||||
query.finished = true;
|
||||
query.cpu_end = ScopeTimings::Clock::now();
|
||||
break;
|
||||
}
|
||||
if (i == 0) {
|
||||
CLOG_ERROR(&LOG, "Profile GPU error: Extra GPU_debug_group_end() call.\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VKContext::process_frame_timings()
|
||||
{
|
||||
if (!G.profile_gpu) {
|
||||
return;
|
||||
}
|
||||
|
||||
Vector<ScopeTimings> &queries = scope_timings;
|
||||
|
||||
bool frame_is_valid = !queries.is_empty();
|
||||
|
||||
for (int i = queries.size() - 1; i >= 0; i--) {
|
||||
if (!queries[i].finished) {
|
||||
frame_is_valid = false;
|
||||
CLOG_ERROR(&LOG, "Profile GPU error: Missing GPU_debug_group_end() call\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!frame_is_valid) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (ScopeTimings &query : queries) {
|
||||
ProfileReport::get().add_group_cpu(query.name,
|
||||
query.cpu_start.time_since_epoch().count(),
|
||||
query.cpu_end.time_since_epoch().count());
|
||||
}
|
||||
|
||||
queries.clear();
|
||||
}
|
||||
|
||||
bool VKContext::debug_capture_begin(const char *title)
|
||||
|
||||
@@ -102,7 +102,7 @@ class VKThreadData : public NonCopyable, NonMovable {
|
||||
* in flight used by GHOST. Therefore, this constant *must* always
|
||||
* match GHOST_ContextVK's GHOST_FRAMES_IN_FLIGHT.
|
||||
*/
|
||||
static constexpr uint32_t resource_pools_count = 4;
|
||||
static constexpr uint32_t resource_pools_count = 5;
|
||||
|
||||
public:
|
||||
/** Thread ID this instance belongs to. */
|
||||
|
||||
Reference in New Issue
Block a user