GPU: Add Image Load Store extension support

This wraps the functionality used to speedup EEVEE volumetrics.

This touches the rendering code of EEVEE as it should fix a mis-usage of
the GL barrier. The barrier changed type and location, removing an
unused barrier.
This commit is contained in:
Clément Foucault
2020-09-12 06:10:11 +02:00
parent a442da62dc
commit 136bdb561b
22 changed files with 278 additions and 34 deletions

View File

@@ -84,9 +84,7 @@ extern struct DrawEngineType draw_engine_eevee_type;
#define EEVEE_PROBE_MAX min_ii(MAX_PROBE, GPU_max_texture_layers() / 6)
#define EEVEE_VELOCITY_TILE_SIZE 32
#define USE_VOLUME_OPTI \
(GLEW_ARB_shader_image_load_store && GLEW_ARB_shading_language_420pack && \
!GPU_crappy_amd_driver())
#define USE_VOLUME_OPTI (GPU_shader_image_load_store_support())
#define SWAP_DOUBLE_BUFFERS() \
{ \

View File

@@ -601,6 +601,10 @@ void EEVEE_volumes_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
if (USE_VOLUME_OPTI) {
DRW_shgroup_uniform_image_ref(grp, "finalScattering_img", &txl->volume_scatter_history);
DRW_shgroup_uniform_image_ref(grp, "finalTransmittance_img", &txl->volume_transmit_history);
}
DRW_shgroup_call_procedural_triangles(
grp, NULL, USE_VOLUME_OPTI ? 1 : common_data->vol_tex_size[2]);
@@ -610,6 +614,7 @@ void EEVEE_volumes_cache_finish(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_shgroup_uniform_texture_ref(grp, "inScattering", &txl->volume_scatter);
DRW_shgroup_uniform_texture_ref(grp, "inTransmittance", &txl->volume_transmit);
DRW_shgroup_uniform_texture_ref(grp, "inSceneDepth", &e_data.depth_src);
DRW_shgroup_uniform_block(grp, "light_block", sldata->light_ubo);
DRW_shgroup_uniform_block(grp, "common_block", sldata->common_ubo);
DRW_shgroup_uniform_block(grp, "probe_block", sldata->probe_ubo);
DRW_shgroup_uniform_block(grp, "renderpass_block", sldata->renderpass_ubo.combined);
@@ -714,15 +719,7 @@ void EEVEE_volumes_compute(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_draw_pass(psl->volumetric_scatter_ps);
if (USE_VOLUME_OPTI) {
int tex_scatter = GPU_texture_opengl_bindcode(txl->volume_scatter_history);
int tex_transmit = GPU_texture_opengl_bindcode(txl->volume_transmit_history);
/* TODO(fclem) Encapsulate these GL calls into DRWManager. */
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
/* Subtlety here! we need to tell the GL that the texture is layered (GL_TRUE)
* in order to bind the full 3D texture and not just a 2D slice. */
glBindImageTexture(0, tex_scatter, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
glBindImageTexture(1, tex_transmit, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
/* Avoid feedback loop assert. */
GPU_framebuffer_bind(fbl->volumetric_fb);
}
else {
@@ -731,13 +728,6 @@ void EEVEE_volumes_compute(EEVEE_ViewLayerData *sldata, EEVEE_Data *vedata)
DRW_draw_pass(psl->volumetric_integration_ps);
if (USE_VOLUME_OPTI) {
glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
glBindImageTexture(0, 0, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
glBindImageTexture(1, 0, 0, GL_TRUE, 0, GL_WRITE_ONLY, GL_R11F_G11F_B10F);
}
SWAP(struct GPUFrameBuffer *, fbl->volumetric_scat_fb, fbl->volumetric_integ_fb);
SWAP(GPUTexture *, txl->volume_scatter, txl->volume_scatter_history);
SWAP(GPUTexture *, txl->volume_transmit, txl->volume_transmit_history);
@@ -763,6 +753,10 @@ void EEVEE_volumes_resolve(EEVEE_ViewLayerData *UNUSED(sldata), EEVEE_Data *veda
DefaultTextureList *dtxl = DRW_viewport_texture_list_get();
e_data.depth_src = dtxl->depth;
if (USE_VOLUME_OPTI) {
GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
}
/* Apply for opaque geometry. */
GPU_framebuffer_bind(fbl->main_color_fb);
DRW_draw_pass(psl->volumetric_resolve_ps);

View File

@@ -11,8 +11,8 @@ uniform sampler3D volumeScattering; /* Result of the scatter step */
uniform sampler3D volumeExtinction;
#ifdef USE_VOLUME_OPTI
uniform layout(binding = 0, r11f_g11f_b10f) writeonly restrict image3D finalScattering_img;
uniform layout(binding = 1, r11f_g11f_b10f) writeonly restrict image3D finalTransmittance_img;
uniform layout(r11f_g11f_b10f) writeonly restrict image3D finalScattering_img;
uniform layout(r11f_g11f_b10f) writeonly restrict image3D finalTransmittance_img;
vec3 finalScattering;
vec3 finalTransmittance;

View File

@@ -557,6 +557,9 @@ void DRW_shgroup_uniform_ivec4(DRWShadingGroup *shgroup,
int arraysize);
void DRW_shgroup_uniform_mat3(DRWShadingGroup *shgroup, const char *name, const float (*value)[3]);
void DRW_shgroup_uniform_mat4(DRWShadingGroup *shgroup, const char *name, const float (*value)[4]);
/* Only to be used when image load store is supported (GPU_shader_image_load_store_support()). */
void DRW_shgroup_uniform_image(DRWShadingGroup *shgroup, const char *name, const GPUTexture *tex);
void DRW_shgroup_uniform_image_ref(DRWShadingGroup *shgroup, const char *name, GPUTexture **tex);
/* Store value instead of referencing it. */
void DRW_shgroup_uniform_int_copy(DRWShadingGroup *shgroup, const char *name, const int value);
void DRW_shgroup_uniform_ivec2_copy(DRWShadingGroup *shgroup, const char *name, const int *value);

View File

@@ -278,6 +278,8 @@ typedef enum {
DRW_UNIFORM_FLOAT_COPY,
DRW_UNIFORM_TEXTURE,
DRW_UNIFORM_TEXTURE_REF,
DRW_UNIFORM_IMAGE,
DRW_UNIFORM_IMAGE_REF,
DRW_UNIFORM_BLOCK,
DRW_UNIFORM_BLOCK_REF,
DRW_UNIFORM_TFEEDBACK_TARGET,

View File

@@ -199,10 +199,12 @@ static void drw_shgroup_uniform_create_ex(DRWShadingGroup *shgroup,
case DRW_UNIFORM_BLOCK_REF:
uni->block_ref = (GPUUniformBuf **)value;
break;
case DRW_UNIFORM_IMAGE:
case DRW_UNIFORM_TEXTURE:
uni->texture = (GPUTexture *)value;
uni->sampler_state = sampler_state;
break;
case DRW_UNIFORM_IMAGE_REF:
case DRW_UNIFORM_TEXTURE_REF:
uni->texture_ref = (GPUTexture **)value;
uni->sampler_state = sampler_state;
@@ -261,6 +263,20 @@ void DRW_shgroup_uniform_texture_ref(DRWShadingGroup *shgroup, const char *name,
DRW_shgroup_uniform_texture_ref_ex(shgroup, name, tex, GPU_SAMPLER_MAX);
}
void DRW_shgroup_uniform_image(DRWShadingGroup *shgroup, const char *name, const GPUTexture *tex)
{
BLI_assert(tex != NULL);
int loc = GPU_shader_get_texture_binding(shgroup->shader, name);
drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_IMAGE, tex, 0, 0, 1);
}
void DRW_shgroup_uniform_image_ref(DRWShadingGroup *shgroup, const char *name, GPUTexture **tex)
{
BLI_assert(tex != NULL);
int loc = GPU_shader_get_texture_binding(shgroup->shader, name);
drw_shgroup_uniform_create_ex(shgroup, loc, DRW_UNIFORM_IMAGE_REF, tex, 0, 0, 1);
}
void DRW_shgroup_uniform_block(DRWShadingGroup *shgroup,
const char *name,
const GPUUniformBuf *ubo)

View File

@@ -596,6 +596,12 @@ static void draw_update_uniforms(DRWShadingGroup *shgroup,
case DRW_UNIFORM_TEXTURE_REF:
GPU_texture_bind_ex(*uni->texture_ref, uni->sampler_state, uni->location, false);
break;
case DRW_UNIFORM_IMAGE:
GPU_texture_image_bind(uni->texture, uni->location);
break;
case DRW_UNIFORM_IMAGE_REF:
GPU_texture_image_bind(*uni->texture_ref, uni->location);
break;
case DRW_UNIFORM_BLOCK:
GPU_uniformbuf_bind(uni->block, uni->location);
break;

View File

@@ -45,6 +45,8 @@ bool GPU_depth_blitting_workaround(void);
bool GPU_use_main_context_workaround(void);
bool GPU_crappy_amd_driver(void);
bool GPU_shader_image_load_store_support(void);
bool GPU_mem_stats_supported(void);
void GPU_mem_stats_get(int *totalmem, int *freemem);

View File

@@ -35,6 +35,14 @@ typedef enum eGPUWriteMask {
ENUM_OPERATORS(eGPUWriteMask)
typedef enum eGPUBarrier {
GPU_BARRIER_NONE = 0,
GPU_BARRIER_SHADER_IMAGE_ACCESS = (1 << 0),
GPU_BARRIER_TEXTURE_FETCH = (1 << 1),
} eGPUBarrier;
ENUM_OPERATORS(eGPUBarrier)
/**
* Defines the fixed pipeline blending equation.
* SRC is the output color from the shader.
@@ -152,6 +160,8 @@ eGPUStencilTest GPU_stencil_test_get(void);
void GPU_flush(void);
void GPU_finish(void);
void GPU_memory_barrier(eGPUBarrier barrier);
#ifdef __cplusplus
}
#endif

View File

@@ -243,6 +243,10 @@ void GPU_texture_bind_ex(GPUTexture *tex, eGPUSamplerState state, int unit, cons
void GPU_texture_unbind(GPUTexture *tex);
void GPU_texture_unbind_all(void);
void GPU_texture_image_bind(GPUTexture *tex, int unit);
void GPU_texture_image_unbind(GPUTexture *tex);
void GPU_texture_image_unbind_all(void);
void GPU_texture_copy(GPUTexture *dst, GPUTexture *src);
void GPU_texture_generate_mipmap(GPUTexture *tex);

View File

@@ -102,6 +102,11 @@ bool GPU_crappy_amd_driver(void)
return GCaps.broken_amd_driver;
}
bool GPU_shader_image_load_store_support(void)
{
return GCaps.shader_image_load_store_support;
}
/** \} */
/* -------------------------------------------------------------------- */

View File

@@ -42,6 +42,7 @@ struct GPUCapabilities {
int max_textures_geom = 0;
int max_textures_frag = 0;
bool mem_stats_support = false;
bool shader_image_load_store_support = false;
/* OpenGL related workarounds. */
bool mip_render_workaround = false;
bool depth_blitting_workaround = false;
@@ -52,4 +53,4 @@ struct GPUCapabilities {
extern GPUCapabilities GCaps;
} // namespace blender::gpu
} // namespace blender::gpu

View File

@@ -63,6 +63,7 @@ class ShaderInterface {
/** Enabled bindpoints that needs to be fed with data. */
uint16_t enabled_attr_mask_ = 0;
uint16_t enabled_ubo_mask_ = 0;
uint8_t enabled_ima_mask_ = 0;
uint64_t enabled_tex_mask_ = 0;
/** Location of builtin uniforms. Fast access, no lookup needed. */
int32_t builtins_[GPU_NUM_UNIFORMS];

View File

@@ -30,7 +30,6 @@
#include "BKE_global.h"
#include "GPU_glew.h"
#include "GPU_state.h"
#include "gpu_context_private.hh"
@@ -308,6 +307,17 @@ void GPU_finish(void)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Synchronisation Utils
* \{ */
void GPU_memory_barrier(eGPUBarrier barrier)
{
Context::get()->state_manager->issue_barrier(barrier);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Default OpenGL State
*

View File

@@ -163,10 +163,16 @@ class GPUStateManager {
virtual void apply_state(void) = 0;
virtual void issue_barrier(eGPUBarrier barrier_bits) = 0;
virtual void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) = 0;
virtual void texture_unbind(Texture *tex) = 0;
virtual void texture_unbind_all(void) = 0;
virtual void image_bind(Texture *tex, int unit) = 0;
virtual void image_unbind(Texture *tex) = 0;
virtual void image_unbind_all(void) = 0;
virtual void texture_unpack_row_length_set(uint len) = 0;
};

View File

@@ -418,6 +418,21 @@ void GPU_texture_unbind_all(void)
Context::get()->state_manager->texture_unbind_all();
}
void GPU_texture_image_bind(GPUTexture *tex, int unit)
{
Context::get()->state_manager->image_bind(unwrap(tex), unit);
}
void GPU_texture_image_unbind(GPUTexture *tex)
{
Context::get()->state_manager->image_unbind(unwrap(tex));
}
void GPU_texture_image_unbind_all(void)
{
Context::get()->state_manager->image_unbind_all();
}
void GPU_texture_generate_mipmap(GPUTexture *tex)
{
reinterpret_cast<Texture *>(tex)->generate_mipmap();

View File

@@ -210,6 +210,7 @@ static void detect_workarounds(void)
GLContext::debug_layer_workaround = true;
GLContext::unused_fb_slot_workaround = true;
/* Turn off extensions. */
GCaps.shader_image_load_store_support = false;
GLContext::base_instance_support = false;
GLContext::clear_texture_support = false;
GLContext::copy_image_support = false;
@@ -250,17 +251,20 @@ static void detect_workarounds(void)
(strstr(version, "4.5.13399") || strstr(version, "4.5.13417") ||
strstr(version, "4.5.13422"))) {
GLContext::unused_fb_slot_workaround = true;
GCaps.shader_image_load_store_support = false;
GCaps.broken_amd_driver = true;
}
/* We have issues with this specific renderer. (see T74024) */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE) &&
strstr(renderer, "AMD VERDE")) {
GLContext::unused_fb_slot_workaround = true;
GCaps.shader_image_load_store_support = false;
GCaps.broken_amd_driver = true;
}
/* Fix slowdown on this particular driver. (see T77641) */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_UNIX, GPU_DRIVER_OPENSOURCE) &&
strstr(version, "Mesa 19.3.4")) {
GCaps.shader_image_load_store_support = false;
GCaps.broken_amd_driver = true;
}
/* There is an issue with the #glBlitFramebuffer on MacOS with radeon pro graphics.
@@ -349,10 +353,10 @@ static void detect_workarounds(void)
}
/** Internal capabilities. */
GLint GLContext::max_texture_3d_size;
GLint GLContext::max_cubemap_size;
GLint GLContext::max_ubo_size;
GLint GLContext::max_ubo_binds;
GLint GLContext::max_cubemap_size = 0;
GLint GLContext::max_texture_3d_size = 0;
GLint GLContext::max_ubo_binds = 0;
GLint GLContext::max_ubo_size = 0;
/** Extensions. */
bool GLContext::base_instance_support = false;
bool GLContext::clear_texture_support = false;
@@ -383,6 +387,7 @@ void GLBackend::capabilities_init(void)
glGetIntegerv(GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, &GCaps.max_textures_geom);
glGetIntegerv(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS, &GCaps.max_textures);
GCaps.mem_stats_support = GLEW_NVX_gpu_memory_info || GLEW_ATI_meminfo;
GCaps.shader_image_load_store_support = GLEW_ARB_shader_image_load_store;
/* GL specific capabilities. */
glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &GLContext::max_texture_3d_size);
glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &GLContext::max_cubemap_size);
@@ -413,4 +418,4 @@ void GLBackend::capabilities_init(void)
/** \} */
} // namespace blender::gpu
} // namespace blender::gpu

View File

@@ -56,8 +56,8 @@ class GLSharedOrphanLists {
class GLContext : public Context {
public:
/** Capabilities. */
static GLint max_texture_3d_size;
static GLint max_cubemap_size;
static GLint max_texture_3d_size;
static GLint max_ubo_size;
static GLint max_ubo_binds;
/** Extensions. */

View File

@@ -200,13 +200,16 @@ void check_gl_resources(const char *info)
* be big enough to feed the data range the shader awaits. */
uint16_t ubo_needed = interface->enabled_ubo_mask_;
ubo_needed &= ~ctx->bound_ubo_slots;
/* NOTE: This only check binding. To be valid, the bound texture needs to
* be the same format/target the shader expects. */
uint64_t tex_needed = interface->enabled_tex_mask_;
tex_needed &= ~GLContext::state_manager_active_get()->bound_texture_slots();
/* NOTE: This only check binding. To be valid, the bound image needs to
* be the same format/target the shader expects. */
uint8_t ima_needed = interface->enabled_ima_mask_;
ima_needed &= ~GLContext::state_manager_active_get()->bound_image_slots();
if (ubo_needed == 0 && tex_needed == 0) {
if (ubo_needed == 0 && tex_needed == 0 && ima_needed == 0) {
return;
}
@@ -223,6 +226,7 @@ void check_gl_resources(const char *info)
for (int i = 0; tex_needed != 0; i++, tex_needed >>= 1) {
if ((tex_needed & 1) != 0) {
/* FIXME: texture_get might return an image input instead. */
const ShaderInput *tex_input = interface->texture_get(i);
const char *tex_name = interface->input_name_get(tex_input);
const char *sh_name = ctx->shader->name_get();
@@ -231,6 +235,18 @@ void check_gl_resources(const char *info)
debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL);
}
}
for (int i = 0; ima_needed != 0; i++, ima_needed >>= 1) {
if ((ima_needed & 1) != 0) {
/* FIXME: texture_get might return a texture input instead. */
const ShaderInput *tex_input = interface->texture_get(i);
const char *tex_name = interface->input_name_get(tex_input);
const char *sh_name = ctx->shader->name_get();
char msg[256];
SNPRINTF(msg, "Missing Image bind at slot %d : %s > %s : %s", i, sh_name, tex_name, info);
debug_callback(0, GL_DEBUG_TYPE_ERROR, 0, GL_DEBUG_SEVERITY_HIGH, 0, msg, NULL);
}
}
}
void raise_gl_error(const char *info)

View File

@@ -100,6 +100,31 @@ static inline int sampler_binding(int32_t program,
return -1;
}
}
static inline int image_binding(int32_t program,
uint32_t uniform_index,
int32_t uniform_location,
int *image_len)
{
/* Identify image uniforms and asign image units to them. */
GLint type;
glGetActiveUniformsiv(program, 1, &uniform_index, GL_UNIFORM_TYPE, &type);
switch (type) {
case GL_IMAGE_1D:
case GL_IMAGE_2D:
case GL_IMAGE_3D: {
/* For now just assign a consecutive index. In the future, we should set it in
* the shader using layout(binding = i) and query its value. */
int binding = *image_len;
glUniform1i(uniform_location, binding);
(*image_len)++;
return binding;
}
default:
return -1;
}
}
/** \} */
/* -------------------------------------------------------------------- */
@@ -207,8 +232,8 @@ GLShaderInterface::GLShaderInterface(GLuint program)
enabled_ubo_mask_ |= (1 << input->binding);
}
/* Uniforms */
for (int i = 0, sampler = 0; i < active_uniform_len; i++) {
/* Uniforms & samplers & images */
for (int i = 0, sampler = 0, image = 0; i < active_uniform_len; i++) {
if (BLI_BITMAP_TEST(uniforms_from_blocks, i)) {
continue;
}
@@ -224,6 +249,12 @@ GLShaderInterface::GLShaderInterface(GLuint program)
name_buffer_offset += this->set_input_name(input, name, name_len);
enabled_tex_mask_ |= (input->binding != -1) ? (1lu << input->binding) : 0lu;
if (input->binding == -1) {
input->binding = image_binding(program, i, input->location, &image);
enabled_ima_mask_ |= (input->binding != -1) ? (1lu << input->binding) : 0lu;
}
}
/* Builtin Uniforms */
@@ -296,4 +327,4 @@ void GLShaderInterface::ref_remove(GLVaoCache *ref)
/** \} */
} // namespace blender::gpu
} // namespace blender::gpu

View File

@@ -76,6 +76,7 @@ void GLStateManager::apply_state(void)
this->set_state(this->state);
this->set_mutable_state(this->mutable_state);
this->texture_bind_apply();
this->image_bind_apply();
active_fb->apply_state();
};
@@ -538,4 +539,98 @@ uint64_t GLStateManager::bound_texture_slots(void)
/** \} */
/* -------------------------------------------------------------------- */
/** \name Image Binding (from image load store)
* \{ */
void GLStateManager::image_bind(Texture *tex_, int unit)
{
/* Minimum support is 8 image in the fragment shader. No image for other stages. */
BLI_assert(GPU_shader_image_load_store_support() && unit < 8);
GLTexture *tex = static_cast<GLTexture *>(tex_);
if (G.debug & G_DEBUG_GPU) {
tex->check_feedback_loop();
}
images_[unit] = tex->tex_id_;
formats_[unit] = to_gl_internal_format(tex->format_);
tex->is_bound_ = true;
dirty_image_binds_ |= 1ULL << unit;
}
void GLStateManager::image_unbind(Texture *tex_)
{
GLTexture *tex = static_cast<GLTexture *>(tex_);
if (!tex->is_bound_) {
return;
}
GLuint tex_id = tex->tex_id_;
for (int i = 0; i < ARRAY_SIZE(images_); i++) {
if (images_[i] == tex_id) {
images_[i] = 0;
dirty_image_binds_ |= 1ULL << i;
}
}
tex->is_bound_ = false;
}
void GLStateManager::image_unbind_all(void)
{
for (int i = 0; i < ARRAY_SIZE(images_); i++) {
if (images_[i] != 0) {
images_[i] = 0;
dirty_image_binds_ |= 1ULL << i;
}
}
this->image_bind_apply();
}
void GLStateManager::image_bind_apply(void)
{
if (dirty_image_binds_ == 0) {
return;
}
uint32_t dirty_bind = dirty_image_binds_;
dirty_image_binds_ = 0;
int first = bitscan_forward_uint(dirty_bind);
int last = 32 - bitscan_reverse_uint(dirty_bind);
int count = last - first;
if (GLContext::multi_bind_support) {
glBindImageTextures(first, count, images_ + first);
}
else {
for (int unit = first; unit < last; unit++) {
if ((dirty_bind >> unit) & 1UL) {
glBindImageTexture(unit, images_[unit], 0, GL_TRUE, 0, GL_READ_WRITE, formats_[unit]);
}
}
}
}
uint8_t GLStateManager::bound_image_slots(void)
{
uint8_t bound_slots = 0;
for (int i = 0; i < ARRAY_SIZE(images_); i++) {
if (images_[i] != 0) {
bound_slots |= 1ULL << i;
}
}
return bound_slots;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Memory barrier
* \{ */
void GLStateManager::issue_barrier(eGPUBarrier barrier_bits)
{
glMemoryBarrier(to_gl(barrier_bits));
}
/** \} */
} // namespace blender::gpu

View File

@@ -64,19 +64,30 @@ class GLStateManager : public GPUStateManager {
GLuint samplers_[64] = {0};
uint64_t dirty_texture_binds_ = 0;
GLuint images_[8] = {0};
GLenum formats_[8] = {0};
uint8_t dirty_image_binds_ = 0;
public:
GLStateManager();
void apply_state(void) override;
void issue_barrier(eGPUBarrier barrier_bits) override;
void texture_bind(Texture *tex, eGPUSamplerState sampler, int unit) override;
void texture_bind_temp(GLTexture *tex);
void texture_unbind(Texture *tex) override;
void texture_unbind_all(void) override;
void image_bind(Texture *tex, int unit) override;
void image_unbind(Texture *tex) override;
void image_unbind_all(void) override;
void texture_unpack_row_length_set(uint len) override;
uint64_t bound_texture_slots(void);
uint8_t bound_image_slots(void);
private:
static void set_write_mask(const eGPUWriteMask value);
@@ -95,9 +106,22 @@ class GLStateManager : public GPUStateManager {
void set_mutable_state(const GPUStateMutable &state);
void texture_bind_apply(void);
void image_bind_apply(void);
MEM_CXX_CLASS_ALLOC_FUNCS("GLStateManager")
};
static inline GLbitfield to_gl(eGPUBarrier barrier_bits)
{
GLbitfield barrier = 0;
if (barrier_bits & GPU_BARRIER_SHADER_IMAGE_ACCESS) {
barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
}
if (barrier_bits & GPU_BARRIER_TEXTURE_FETCH) {
barrier |= GL_TEXTURE_FETCH_BARRIER_BIT;
}
return barrier;
}
} // namespace gpu
} // namespace blender