Files
test2/source/blender/gpu/opengl/gl_backend.hh
Clément Foucault 1c47e31367 GPU: Enable GL multithreaded compilation by default
This allows to reduce the waiting time caused by
shader compilation on some GPU-driver combo.

A new settings in the User Preferences make it
possible to override the default amount of worker
threads and optionally use subprocesses.

We still use only one worker thread in cases where
there is no benefit with adding more workers
(like AMD pro driver and Intel windows).

It doesn't scale as much as subprocesses for material
shader compilation but that is for other reasons
explained in #139818.

Add some heuristic to avoid too much memory usage
and / or too many stalls.

Also add some heuristic to the default number of subprocess for
the platform that shows scalling.

Historically, multithreaded compilation was prevented by the
need of context per thread inside `DRWShader` module.
Also there was no good scaling at that time. But
nowadays numbers shows different results with
good scaling with reasonable amount of threads on many
platforms.

Even if we are going for vulkan in the next release
most of the legacy hardware will still use OpenGL for
a few other releases. So it is relevant to make this
easy improvement.

See pull request for measurements.

Pull Request: https://projects.blender.org/blender/blender/pulls/139821
2025-06-09 12:36:06 +02:00

197 lines
4.1 KiB
C++

/* SPDX-FileCopyrightText: 2020 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*/
#pragma once
#include "GPU_capabilities.hh"
#include "GPU_platform.hh"
#include "gpu_backend.hh"
#include "BLI_threads.h"
#include "BLI_vector.hh"
#include "gpu_capabilities_private.hh"
#ifdef WITH_RENDERDOC
# include "renderdoc_api.hh"
#endif
#include "gl_batch.hh"
#include "gl_compilation_subprocess.hh"
#include "gl_compute.hh"
#include "gl_context.hh"
#include "gl_framebuffer.hh"
#include "gl_index_buffer.hh"
#include "gl_query.hh"
#include "gl_shader.hh"
#include "gl_storage_buffer.hh"
#include "gl_texture.hh"
#include "gl_uniform_buffer.hh"
#include "gl_vertex_buffer.hh"
namespace blender {
namespace gpu {
class GLBackend : public GPUBackend {
private:
GLSharedOrphanLists shared_orphan_list_;
#ifdef WITH_RENDERDOC
renderdoc::api::Renderdoc renderdoc_;
#endif
public:
GLBackend()
{
/* platform_init needs to go first. */
GLBackend::platform_init();
GLBackend::capabilities_init();
GLTexture::samplers_init();
}
~GLBackend()
{
GLBackend::platform_exit();
}
void init_resources() override
{
if (GCaps.use_subprocess_shader_compilations) {
compiler_ = MEM_new<GLSubprocessShaderCompiler>(__func__);
}
else {
compiler_ = MEM_new<GLShaderCompiler>(__func__);
}
};
void delete_resources() override
{
/* Delete any resources with context active. */
GLTexture::samplers_free();
MEM_delete(compiler_);
}
static GLBackend *get()
{
return static_cast<GLBackend *>(GPUBackend::get());
}
void samplers_update() override
{
GLTexture::samplers_update();
};
Context *context_alloc(void *ghost_window, void * /*ghost_context*/) override
{
return new GLContext(ghost_window, shared_orphan_list_);
};
Batch *batch_alloc() override
{
return new GLBatch();
};
Fence *fence_alloc() override
{
return new GLFence();
};
FrameBuffer *framebuffer_alloc(const char *name) override
{
return new GLFrameBuffer(name);
};
IndexBuf *indexbuf_alloc() override
{
return new GLIndexBuf();
};
PixelBuffer *pixelbuf_alloc(size_t size) override
{
return new GLPixelBuffer(size);
};
QueryPool *querypool_alloc() override
{
return new GLQueryPool();
};
Shader *shader_alloc(const char *name) override
{
return new GLShader(name);
};
Texture *texture_alloc(const char *name) override
{
return new GLTexture(name);
};
UniformBuf *uniformbuf_alloc(size_t size, const char *name) override
{
return new GLUniformBuf(size, name);
};
StorageBuf *storagebuf_alloc(size_t size, GPUUsageType usage, const char *name) override
{
return new GLStorageBuf(size, usage, name);
};
VertBuf *vertbuf_alloc() override
{
return new GLVertBuf();
};
GLSharedOrphanLists &shared_orphan_list_get()
{
return shared_orphan_list_;
};
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len) override
{
GLContext::get()->state_manager_active_get()->apply_state();
GLCompute::dispatch(groups_x_len, groups_y_len, groups_z_len);
}
void compute_dispatch_indirect(StorageBuf *indirect_buf) override
{
GLContext::get()->state_manager_active_get()->apply_state();
dynamic_cast<GLStorageBuf *>(indirect_buf)->bind_as(GL_DISPATCH_INDIRECT_BUFFER);
/* This barrier needs to be here as it only work on the currently bound indirect buffer. */
glMemoryBarrier(GL_COMMAND_BARRIER_BIT);
glDispatchComputeIndirect((GLintptr)0);
/* Unbind. */
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
}
void shader_cache_dir_clear_old() override
{
#if BLI_SUBPROCESS_SUPPORT
GL_shader_cache_dir_clear_old();
#endif
}
/* Render Frame Coordination */
void render_begin() override{};
void render_end() override{};
void render_step(bool /*force_resource_release*/) override{};
bool debug_capture_begin(const char *title);
void debug_capture_end();
private:
static void platform_init();
static void platform_exit();
static void capabilities_init();
};
} // namespace gpu
} // namespace blender