GPU: Add Shader specialization constant API
Adds API to allow usage of specialization constants in shaders. Specialization constants are dynamic runtime constants which can be compiled into a shader pipeline state object (PSO) to improve runtime performance by reducing shader complexity through shader compiler constant-folding. This API allows specialization constant values to be specified along with a default value if no constant value has been declared. Each GPU backend is then responsible for caching PSO permutations against the current specialization configuration. This patch adds support for specialization constants in the Metal backend and provides a generalised high-level solution which can be adopted by other graphics APIs supporting this feature. Authored by Apple: Michael Parkin-White Authored by Blender: Clément Foucault (files in gpu/test folder) Pull Request: https://projects.blender.org/blender/blender/pulls/115193
This commit is contained in:
committed by
Clément Foucault
parent
eb99895c97
commit
335d3a1b75
@@ -89,6 +89,11 @@ class Film {
|
||||
void sync();
|
||||
void end_sync();
|
||||
|
||||
const FilmData &get_data()
|
||||
{
|
||||
return data_;
|
||||
}
|
||||
|
||||
/** Accumulate the newly rendered sample contained in #RenderBuffers and blit to display. */
|
||||
void accumulate(View &view, GPUTexture *combined_final_tx);
|
||||
|
||||
|
||||
@@ -32,10 +32,16 @@ void HiZBuffer::sync()
|
||||
|
||||
data_.uv_scale = float2(render_extent) / float2(hiz_extent);
|
||||
|
||||
/* TODO(@fclem): There might be occasions where we might not want to
|
||||
* copy mip 0 for performance reasons if there is no need for it. */
|
||||
bool update_mip_0 = true;
|
||||
|
||||
{
|
||||
PassSimple &pass = hiz_update_ps_;
|
||||
GPUShader *sh = inst_.shaders.static_shader_get(HIZ_UPDATE);
|
||||
pass.init();
|
||||
pass.shader_set(inst_.shaders.static_shader_get(HIZ_UPDATE));
|
||||
pass.specialize_constant(sh, "update_mip_0", update_mip_0);
|
||||
pass.shader_set(sh);
|
||||
pass.bind_ssbo("finished_tile_counter", atomic_tile_counter_);
|
||||
/* TODO(fclem): Should be a parameter to avoid confusion. */
|
||||
pass.bind_texture("depth_tx", &src_tx_, with_filter);
|
||||
@@ -46,16 +52,15 @@ void HiZBuffer::sync()
|
||||
pass.bind_image("out_mip_4", &hiz_mip_ref_[4]);
|
||||
pass.bind_image("out_mip_5", &hiz_mip_ref_[5]);
|
||||
pass.bind_image("out_mip_6", &hiz_mip_ref_[6]);
|
||||
/* TODO(@fclem): There might be occasions where we might not want to
|
||||
* copy mip 0 for performance reasons if there is no need for it. */
|
||||
pass.push_constant("update_mip_0", true);
|
||||
pass.dispatch(int3(dispatch_size, 1));
|
||||
pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
|
||||
}
|
||||
{
|
||||
PassSimple &pass = hiz_update_layer_ps_;
|
||||
GPUShader *sh = inst_.shaders.static_shader_get(HIZ_UPDATE_LAYER);
|
||||
pass.init();
|
||||
pass.shader_set(inst_.shaders.static_shader_get(HIZ_UPDATE_LAYER));
|
||||
pass.specialize_constant(sh, "update_mip_0", update_mip_0);
|
||||
pass.shader_set(sh);
|
||||
pass.bind_ssbo("finished_tile_counter", atomic_tile_counter_);
|
||||
/* TODO(fclem): Should be a parameter to avoid confusion. */
|
||||
pass.bind_texture("depth_layered_tx", &src_tx_, with_filter);
|
||||
@@ -66,9 +71,6 @@ void HiZBuffer::sync()
|
||||
pass.bind_image("out_mip_4", &hiz_mip_ref_[4]);
|
||||
pass.bind_image("out_mip_5", &hiz_mip_ref_[5]);
|
||||
pass.bind_image("out_mip_6", &hiz_mip_ref_[6]);
|
||||
/* TODO(@fclem): There might be occasions where we might not want to
|
||||
* copy mip 0 for performance reasons if there is no need for it. */
|
||||
pass.push_constant("update_mip_0", true);
|
||||
pass.push_constant("layer_id", &layer_id_);
|
||||
pass.dispatch(int3(dispatch_size, 1));
|
||||
pass.barrier(GPU_BARRIER_TEXTURE_FETCH);
|
||||
|
||||
@@ -503,6 +503,8 @@ void DeferredLayer::end_sync()
|
||||
eClosureBits evaluated_closures = CLOSURE_DIFFUSE | CLOSURE_TRANSLUCENT | CLOSURE_REFLECTION |
|
||||
CLOSURE_REFRACTION;
|
||||
if (closure_bits_ & evaluated_closures) {
|
||||
RenderBuffersInfoData &rbuf_data = inst_.render_buffers.data;
|
||||
|
||||
/* Add the tile classification step at the end of the GBuffer pass. */
|
||||
{
|
||||
/* Fill tile mask texture with the collected closure present in a tile. */
|
||||
@@ -568,7 +570,15 @@ void DeferredLayer::end_sync()
|
||||
/* Submit the more costly ones first to avoid long tail in occupancy.
|
||||
* See page 78 of "SIGGRAPH 2023: Unreal Engine Substrate" by Hillaire & de Rousiers. */
|
||||
for (int i = ARRAY_SIZE(closure_bufs_) - 1; i >= 0; i--) {
|
||||
sub.shader_set(inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i)));
|
||||
GPUShader *sh = inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
|
||||
/* TODO(fclem): Could specialize directly with the pass index but this would break it for
|
||||
* OpenGL and Vulkan implementation which aren't fully supporting the specialize
|
||||
* constant. */
|
||||
sub.specialize_constant(sh, "render_pass_shadow_enabled", rbuf_data.shadow_id != -1);
|
||||
const ShadowSceneData &shadow_scene = inst_.shadows.get_data();
|
||||
sub.specialize_constant(sh, "shadow_ray_count", &shadow_scene.ray_count);
|
||||
sub.specialize_constant(sh, "shadow_ray_step_count", &shadow_scene.step_count);
|
||||
sub.shader_set(sh);
|
||||
sub.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]);
|
||||
sub.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]);
|
||||
sub.bind_image("direct_radiance_3_img", &direct_radiance_txs_[2]);
|
||||
@@ -596,9 +606,17 @@ void DeferredLayer::end_sync()
|
||||
{
|
||||
PassSimple &pass = combine_ps_;
|
||||
pass.init();
|
||||
GPUShader *sh = inst_.shaders.static_shader_get(DEFERRED_COMBINE);
|
||||
/* TODO(fclem): Could specialize directly with the pass index but this would break it for
|
||||
* OpenGL and Vulkan implementation which aren't fully supporting the specialize
|
||||
* constant. */
|
||||
pass.specialize_constant(
|
||||
sh, "render_pass_diffuse_light_enabled", rbuf_data.diffuse_light_id != -1);
|
||||
pass.specialize_constant(
|
||||
sh, "render_pass_specular_light_enabled", rbuf_data.specular_light_id != -1);
|
||||
pass.shader_set(sh);
|
||||
/* Use depth test to reject background pixels. */
|
||||
pass.state_set(DRW_STATE_WRITE_COLOR | DRW_STATE_DEPTH_GREATER | DRW_STATE_BLEND_ADD_FULL);
|
||||
pass.shader_set(inst_.shaders.static_shader_get(DEFERRED_COMBINE));
|
||||
pass.bind_image("direct_radiance_1_img", &direct_radiance_txs_[0]);
|
||||
pass.bind_image("direct_radiance_2_img", &direct_radiance_txs_[1]);
|
||||
pass.bind_image("direct_radiance_3_img", &direct_radiance_txs_[2]);
|
||||
|
||||
@@ -150,8 +150,11 @@ void RayTraceModule::sync()
|
||||
/* Denoise. */
|
||||
for (auto type : IndexRange(3)) {
|
||||
PassSimple &pass = PASS_VARIATION(denoise_spatial_, type, _ps_);
|
||||
GPUShader *sh = inst_.shaders.static_shader_get(SHADER_VARIATION(RAY_DENOISE_SPATIAL_, type));
|
||||
pass.init();
|
||||
pass.shader_set(inst_.shaders.static_shader_get(SHADER_VARIATION(RAY_DENOISE_SPATIAL_, type)));
|
||||
pass.specialize_constant(sh, "raytrace_resolution_scale", &data_.resolution_scale);
|
||||
pass.specialize_constant(sh, "skip_denoise", reinterpret_cast<bool *>(&data_.skip_denoise));
|
||||
pass.shader_set(sh);
|
||||
pass.bind_ssbo("tiles_coord_buf", &raytrace_denoise_tiles_buf_);
|
||||
pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
|
||||
pass.bind_texture("depth_tx", &depth_tx);
|
||||
|
||||
@@ -353,6 +353,11 @@ class ShadowModule {
|
||||
pass.bind_texture(SHADOW_TILEMAPS_TEX_SLOT, &tilemap_pool.tilemap_tx);
|
||||
}
|
||||
|
||||
const ShadowSceneData &get_data()
|
||||
{
|
||||
return data_;
|
||||
}
|
||||
|
||||
private:
|
||||
void remove_unused();
|
||||
void debug_page_map_call(DRWPass *pass);
|
||||
|
||||
@@ -77,11 +77,13 @@ void main()
|
||||
}
|
||||
}
|
||||
|
||||
#if 1 /* TODO(fclem): Only if needed. */
|
||||
/* Light passes. */
|
||||
output_renderpass_color(uniform_buf.render_pass.diffuse_light_id, vec4(out_diffuse, 1.0));
|
||||
output_renderpass_color(uniform_buf.render_pass.specular_light_id, vec4(out_specular, 1.0));
|
||||
#endif
|
||||
if (render_pass_diffuse_light_enabled) {
|
||||
output_renderpass_color(uniform_buf.render_pass.diffuse_light_id, vec4(out_diffuse, 1.0));
|
||||
}
|
||||
if (render_pass_specular_light_enabled) {
|
||||
output_renderpass_color(uniform_buf.render_pass.specular_light_id, vec4(out_specular, 1.0));
|
||||
}
|
||||
|
||||
if (any(isnan(out_combined))) {
|
||||
out_combined = vec4(1.0, 0.0, 1.0, 0.0);
|
||||
|
||||
@@ -98,17 +98,17 @@ void main()
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 1 /* TODO(fclem): Limit to when shadow pass is needed. */
|
||||
vec3 radiance_shadowed = vec3(0);
|
||||
vec3 radiance_unshadowed = vec3(0);
|
||||
for (int i = 0; i < LIGHT_CLOSURE_EVAL_COUNT && i < gbuf.closure_count; i++) {
|
||||
radiance_shadowed += stack.cl[i].light_shadowed;
|
||||
radiance_unshadowed += stack.cl[i].light_unshadowed;
|
||||
if (render_pass_shadow_enabled) {
|
||||
vec3 radiance_shadowed = vec3(0);
|
||||
vec3 radiance_unshadowed = vec3(0);
|
||||
for (int i = 0; i < LIGHT_CLOSURE_EVAL_COUNT && i < gbuf.closure_count; i++) {
|
||||
radiance_shadowed += stack.cl[i].light_shadowed;
|
||||
radiance_unshadowed += stack.cl[i].light_unshadowed;
|
||||
}
|
||||
/* TODO(fclem): Change shadow pass to be colored. */
|
||||
vec3 shadows = radiance_shadowed * safe_rcp(radiance_unshadowed);
|
||||
output_renderpass_value(uniform_buf.render_pass.shadow_id, average(shadows));
|
||||
}
|
||||
/* TODO(fclem): Change shadow pass to be colored. */
|
||||
vec3 shadows = radiance_shadowed * safe_rcp(radiance_unshadowed);
|
||||
output_renderpass_value(uniform_buf.render_pass.shadow_id, average(shadows));
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < LIGHT_CLOSURE_EVAL_COUNT && i < gbuf.closure_count; i++) {
|
||||
/* TODO(fclem): Layered texture. */
|
||||
|
||||
@@ -58,8 +58,15 @@ void light_shadow_single(uint l_idx,
|
||||
if (attenuation < LIGHT_ATTENUATION_THRESHOLD) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* TODO(fclem): Enable for OpenGL and Vulkan once they fully support specialization constants. */
|
||||
#if defined(SPECIALIZED_SHADOW_PARAMS) && defined(GPU_METAL)
|
||||
int ray_count = shadow_ray_count;
|
||||
int ray_step_count = shadow_ray_step_count;
|
||||
#else
|
||||
int ray_count = uniform_buf.shadow.ray_count;
|
||||
int ray_step_count = uniform_buf.shadow.step_count;
|
||||
#endif
|
||||
|
||||
ShadowEvalResult result = shadow_eval(
|
||||
light, is_directional, P, Ng, thickness, ray_count, ray_step_count);
|
||||
@@ -131,8 +138,15 @@ void light_eval_single(uint l_idx,
|
||||
inout uint shift)
|
||||
{
|
||||
LightData light = light_buf[l_idx];
|
||||
|
||||
/* TODO(fclem): Enable for OpenGL and Vulkan once they fully support specialization constants. */
|
||||
#if defined(SPECIALIZED_SHADOW_PARAMS) && defined(GPU_METAL)
|
||||
int ray_count = shadow_ray_count;
|
||||
int ray_step_count = shadow_ray_step_count;
|
||||
#else
|
||||
int ray_count = uniform_buf.shadow.ray_count;
|
||||
int ray_step_count = uniform_buf.shadow.step_count;
|
||||
#endif
|
||||
|
||||
bool use_subsurface = thickness > 0.0;
|
||||
LightVector lv = light_vector_get(light, is_directional, P);
|
||||
|
||||
@@ -42,10 +42,21 @@ void main()
|
||||
const uint tile_size = RAYTRACE_GROUP_SIZE;
|
||||
uvec2 tile_coord = unpackUvec2x16(tiles_coord_buf[gl_WorkGroupID.x]);
|
||||
|
||||
ivec2 texel_fullres = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
|
||||
ivec2 texel = (texel_fullres) / uniform_buf.raytrace.resolution_scale;
|
||||
#ifdef GPU_METAL
|
||||
int rt_resolution_scale = raytrace_resolution_scale;
|
||||
#else /* TODO(fclem): Support specialization on OpenGL and Vulkan. */
|
||||
int rt_resolution_scale = uniform_buf.raytrace.resolution_scale;
|
||||
#endif
|
||||
|
||||
if (uniform_buf.raytrace.skip_denoise) {
|
||||
ivec2 texel_fullres = ivec2(gl_LocalInvocationID.xy + tile_coord * tile_size);
|
||||
ivec2 texel = (texel_fullres) / rt_resolution_scale;
|
||||
|
||||
#ifdef GPU_METAL
|
||||
bool do_skip_denoise = skip_denoise;
|
||||
#else /* TODO(fclem): Support specialization on OpenGL and Vulkan. */
|
||||
bool do_skip_denoise = uniform_buf.raytrace.skip_denoise;
|
||||
#endif
|
||||
if (do_skip_denoise) {
|
||||
imageStore(out_radiance_img, texel_fullres, imageLoad(ray_radiance_img, texel));
|
||||
return;
|
||||
}
|
||||
@@ -122,7 +133,7 @@ void main()
|
||||
/* NOTE: filter_size should never be greater than twice RAYTRACE_GROUP_SIZE. Otherwise, the
|
||||
* reconstruction can becomes ill defined since we don't know if further tiles are valid. */
|
||||
filter_size = 12.0 * sqrt(filter_size_factor);
|
||||
if (uniform_buf.raytrace.resolution_scale > 1) {
|
||||
if (rt_resolution_scale > 1) {
|
||||
/* Filter at least 1 trace pixel to fight the undersampling. */
|
||||
filter_size = max(filter_size, 3.0);
|
||||
sample_count = max(sample_count, 5u);
|
||||
|
||||
@@ -44,7 +44,8 @@ void thickness_from_shadow_single(
|
||||
float thickness_from_shadow(vec3 P, vec3 Ng, float vPz)
|
||||
{
|
||||
/* Bias surface inward to avoid shadow map aliasing. */
|
||||
P += -Ng * uniform_buf.shadow.normal_bias;
|
||||
float normal_offset = uniform_buf.shadow.normal_bias;
|
||||
P += -Ng * normal_offset;
|
||||
|
||||
vec2 thickness = vec2(0.0);
|
||||
|
||||
|
||||
@@ -63,6 +63,10 @@ GPU_SHADER_CREATE_INFO(eevee_deferred_light)
|
||||
.image_out(2, DEFERRED_RADIANCE_FORMAT, "direct_radiance_1_img")
|
||||
.image_out(3, DEFERRED_RADIANCE_FORMAT, "direct_radiance_2_img")
|
||||
.image_out(4, DEFERRED_RADIANCE_FORMAT, "direct_radiance_3_img")
|
||||
.specialization_constant(Type::BOOL, "render_pass_shadow_enabled", true)
|
||||
.define("SPECIALIZED_SHADOW_PARAMS")
|
||||
.specialization_constant(Type::INT, "shadow_ray_count", 1)
|
||||
.specialization_constant(Type::INT, "shadow_ray_step_count", 6)
|
||||
.additional_info("eevee_shared",
|
||||
"eevee_gbuffer_data",
|
||||
"eevee_utility_texture",
|
||||
@@ -107,6 +111,10 @@ GPU_SHADER_CREATE_INFO(eevee_deferred_combine)
|
||||
"eevee_render_pass_out",
|
||||
"draw_fullscreen")
|
||||
.fragment_source("eevee_deferred_combine_frag.glsl")
|
||||
/* NOTE: Both light IDs have a valid specialized assignment of '-1' so only when default is
|
||||
* present will we instead dynamically look-up ID from the uniform buffer. */
|
||||
.specialization_constant(Type::BOOL, "render_pass_diffuse_light_enabled", true)
|
||||
.specialization_constant(Type::BOOL, "render_pass_specular_light_enabled", true)
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_deferred_capture_eval)
|
||||
|
||||
@@ -19,7 +19,7 @@ GPU_SHADER_CREATE_INFO(eevee_hiz_update_base)
|
||||
.image(4, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_4")
|
||||
.image(5, GPU_R32F, Qualifier::READ_WRITE, ImageType::FLOAT_2D, "out_mip_5")
|
||||
.image(6, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_mip_6")
|
||||
.push_constant(Type::BOOL, "update_mip_0")
|
||||
.specialization_constant(Type::BOOL, "update_mip_0", false)
|
||||
.compute_source("eevee_hiz_update_comp.glsl");
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_hiz_update)
|
||||
|
||||
@@ -142,6 +142,8 @@ GPU_SHADER_CREATE_INFO(eevee_ray_denoise_spatial)
|
||||
.image(5, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "out_hit_depth_img")
|
||||
.image(6, RAYTRACE_TILEMASK_FORMAT, Qualifier::READ, ImageType::UINT_2D_ARRAY, "tile_mask_img")
|
||||
.storage_buf(4, Qualifier::READ, "uint", "tiles_coord_buf[]")
|
||||
.specialization_constant(Type::INT, "raytrace_resolution_scale", 2)
|
||||
.specialization_constant(Type::BOOL, "skip_denoise", false)
|
||||
.compute_source("eevee_ray_denoise_spatial_comp.glsl");
|
||||
|
||||
EEVEE_RAYTRACE_CLOSURE_VARIATION(eevee_ray_denoise_spatial)
|
||||
|
||||
@@ -111,6 +111,39 @@ void PushConstant::execute(RecordingState &state) const
|
||||
}
|
||||
}
|
||||
|
||||
void SpecializeConstant::execute() const
|
||||
{
|
||||
/* All specialization constants should exist as they are not optimized out like uniforms. */
|
||||
BLI_assert(location != -1);
|
||||
|
||||
switch (type) {
|
||||
case SpecializeConstant::Type::IntValue:
|
||||
GPU_shader_constant_int_ex(shader, location, int_value);
|
||||
break;
|
||||
case SpecializeConstant::Type::IntReference:
|
||||
GPU_shader_constant_int_ex(shader, location, *int_ref);
|
||||
break;
|
||||
case SpecializeConstant::Type::UintValue:
|
||||
GPU_shader_constant_uint_ex(shader, location, uint_value);
|
||||
break;
|
||||
case SpecializeConstant::Type::UintReference:
|
||||
GPU_shader_constant_uint_ex(shader, location, *uint_ref);
|
||||
break;
|
||||
case SpecializeConstant::Type::FloatValue:
|
||||
GPU_shader_constant_float_ex(shader, location, float_value);
|
||||
break;
|
||||
case SpecializeConstant::Type::FloatReference:
|
||||
GPU_shader_constant_float_ex(shader, location, *float_ref);
|
||||
break;
|
||||
case SpecializeConstant::Type::BoolValue:
|
||||
GPU_shader_constant_bool_ex(shader, location, bool_value);
|
||||
break;
|
||||
case SpecializeConstant::Type::BoolReference:
|
||||
GPU_shader_constant_bool_ex(shader, location, *bool_ref);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Draw::execute(RecordingState &state) const
|
||||
{
|
||||
state.front_facing_set(handle.has_inverted_handedness());
|
||||
@@ -424,6 +457,39 @@ std::string PushConstant::serialize() const
|
||||
return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")";
|
||||
}
|
||||
|
||||
std::string SpecializeConstant::serialize() const
|
||||
{
|
||||
std::stringstream ss;
|
||||
switch (type) {
|
||||
case Type::IntValue:
|
||||
ss << int_value;
|
||||
break;
|
||||
case Type::UintValue:
|
||||
ss << uint_value;
|
||||
break;
|
||||
case Type::FloatValue:
|
||||
ss << float_value;
|
||||
break;
|
||||
case Type::BoolValue:
|
||||
ss << bool_value;
|
||||
break;
|
||||
case Type::IntReference:
|
||||
ss << *int_ref;
|
||||
break;
|
||||
case Type::UintReference:
|
||||
ss << *uint_ref;
|
||||
break;
|
||||
case Type::FloatReference:
|
||||
ss << *float_ref;
|
||||
break;
|
||||
case Type::BoolReference:
|
||||
ss << *bool_ref;
|
||||
break;
|
||||
}
|
||||
return std::string(".specialize_constant(") + std::to_string(location) + ", data=" + ss.str() +
|
||||
")";
|
||||
}
|
||||
|
||||
std::string Draw::serialize() const
|
||||
{
|
||||
std::string inst_len = (instance_len == uint(-1)) ? "from_batch" : std::to_string(instance_len);
|
||||
|
||||
@@ -98,6 +98,7 @@ enum class Type : uint8_t {
|
||||
DrawIndirect,
|
||||
FramebufferBind,
|
||||
PushConstant,
|
||||
SpecializeConstant,
|
||||
ResourceBind,
|
||||
ShaderBind,
|
||||
SubPassTransition,
|
||||
@@ -296,6 +297,53 @@ struct PushConstant {
|
||||
std::string serialize() const;
|
||||
};
|
||||
|
||||
struct SpecializeConstant {
|
||||
/* Shader to set the constant in. */
|
||||
GPUShader *shader;
|
||||
/* Value of the constant or a reference to it. */
|
||||
union {
|
||||
int int_value;
|
||||
int uint_value;
|
||||
float float_value;
|
||||
bool bool_value;
|
||||
const int *int_ref;
|
||||
const int *uint_ref;
|
||||
const float *float_ref;
|
||||
const bool *bool_ref;
|
||||
};
|
||||
|
||||
int location;
|
||||
|
||||
enum class Type : uint8_t {
|
||||
IntValue = 0,
|
||||
UintValue,
|
||||
FloatValue,
|
||||
BoolValue,
|
||||
IntReference,
|
||||
UintReference,
|
||||
FloatReference,
|
||||
BoolReference,
|
||||
} type;
|
||||
|
||||
SpecializeConstant() = default;
|
||||
|
||||
SpecializeConstant(GPUShader *sh, int loc, const float &val)
|
||||
: shader(sh), float_value(val), location(loc), type(Type::FloatValue){};
|
||||
SpecializeConstant(GPUShader *sh, int loc, const int &val)
|
||||
: shader(sh), int_value(val), location(loc), type(Type::IntValue){};
|
||||
SpecializeConstant(GPUShader *sh, int loc, const bool &val)
|
||||
: shader(sh), bool_value(val), location(loc), type(Type::BoolValue){};
|
||||
SpecializeConstant(GPUShader *sh, int loc, const float *val)
|
||||
: shader(sh), float_ref(val), location(loc), type(Type::FloatReference){};
|
||||
SpecializeConstant(GPUShader *sh, int loc, const int *val)
|
||||
: shader(sh), int_ref(val), location(loc), type(Type::IntReference){};
|
||||
SpecializeConstant(GPUShader *sh, int loc, const bool *val)
|
||||
: shader(sh), bool_ref(val), location(loc), type(Type::BoolReference){};
|
||||
|
||||
void execute() const;
|
||||
std::string serialize() const;
|
||||
};
|
||||
|
||||
struct Draw {
|
||||
GPUBatch *batch;
|
||||
uint instance_len;
|
||||
@@ -403,6 +451,7 @@ union Undetermined {
|
||||
FramebufferBind framebuffer_bind;
|
||||
SubPassTransition subpass_transition;
|
||||
PushConstant push_constant;
|
||||
SpecializeConstant specialize_constant;
|
||||
Draw draw;
|
||||
DrawMulti draw_multi;
|
||||
DrawIndirect draw_indirect;
|
||||
|
||||
@@ -362,6 +362,27 @@ class PassBase {
|
||||
void push_constant(const char *name, const int4 *data, int array_len = 1);
|
||||
void push_constant(const char *name, const float4x4 *data);
|
||||
|
||||
/**
|
||||
* Update a shader specialization constant.
|
||||
*
|
||||
* IMPORTANT: Non-specialized constants can have undefined values.
|
||||
* Specialize every constant before binding a shader.
|
||||
*
|
||||
* Reference versions are to be used when the resource might change between the time it is
|
||||
* referenced and the time it is dereferenced for drawing.
|
||||
*
|
||||
* IMPORTANT: Will keep a reference to the data and dereference it upon drawing. Make sure data
|
||||
* still alive until pass submission.
|
||||
*/
|
||||
void specialize_constant(GPUShader *shader, const char *name, const float &data);
|
||||
void specialize_constant(GPUShader *shader, const char *name, const int &data);
|
||||
void specialize_constant(GPUShader *shader, const char *name, const uint &data);
|
||||
void specialize_constant(GPUShader *shader, const char *name, const bool &data);
|
||||
void specialize_constant(GPUShader *shader, const char *name, const float *data);
|
||||
void specialize_constant(GPUShader *shader, const char *name, const int *data);
|
||||
void specialize_constant(GPUShader *shader, const char *name, const uint *data);
|
||||
void specialize_constant(GPUShader *shader, const char *name, const bool *data);
|
||||
|
||||
/**
|
||||
* Turn the pass into a string for inspection.
|
||||
*/
|
||||
@@ -574,6 +595,9 @@ template<class T> void PassBase<T>::submit(command::RecordingState &state) const
|
||||
case command::Type::PushConstant:
|
||||
commands_[header.index].push_constant.execute(state);
|
||||
break;
|
||||
case command::Type::SpecializeConstant:
|
||||
commands_[header.index].specialize_constant.execute();
|
||||
break;
|
||||
case command::Type::Draw:
|
||||
commands_[header.index].draw.execute(state);
|
||||
break;
|
||||
@@ -1229,6 +1253,84 @@ template<class T> inline void PassBase<T>::push_constant(const char *name, const
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Resource bind Implementation
|
||||
* \{ */
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const int &constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const uint &constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const float &constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const bool &constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const int *constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const uint *constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const float *constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
template<class T>
|
||||
inline void PassBase<T>::specialize_constant(GPUShader *shader,
|
||||
const char *constant_name,
|
||||
const bool *constant_value)
|
||||
{
|
||||
create_command(Type::SpecializeConstant).specialize_constant = {
|
||||
shader, GPU_shader_get_constant(shader, constant_name), constant_value};
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace blender::draw
|
||||
|
||||
@@ -603,6 +603,7 @@ set(GLSL_SRC_TEST
|
||||
tests/shaders/gpu_compute_ssbo_test.glsl
|
||||
tests/shaders/gpu_compute_vbo_test.glsl
|
||||
tests/shaders/gpu_compute_dummy_test.glsl
|
||||
tests/shaders/gpu_specialization_test.glsl
|
||||
tests/shaders/gpu_framebuffer_layer_viewport_test.glsl
|
||||
tests/shaders/gpu_framebuffer_subpass_input_test.glsl
|
||||
tests/shaders/gpu_push_constants_test.glsl
|
||||
@@ -893,6 +894,7 @@ if(WITH_GTESTS)
|
||||
tests/index_buffer_test.cc
|
||||
tests/push_constants_test.cc
|
||||
tests/shader_test.cc
|
||||
tests/specialization_constants_test.cc
|
||||
tests/state_blend_test.cc
|
||||
tests/storage_buffer_test.cc
|
||||
tests/texture_test.cc
|
||||
|
||||
@@ -125,6 +125,11 @@ int GPU_shader_get_sampler_binding(GPUShader *shader, const char *name);
|
||||
*/
|
||||
int GPU_shader_get_uniform(GPUShader *shader, const char *name);
|
||||
|
||||
/**
|
||||
* Returns specialization constant location.
|
||||
*/
|
||||
int GPU_shader_get_constant(GPUShader *shader, const char *name);
|
||||
|
||||
/**
|
||||
* Sets a generic push constant (a.k.a. uniform).
|
||||
* \a length and \a array_size should match the create info push_constant declaration.
|
||||
@@ -173,6 +178,26 @@ bool GPU_shader_get_attribute_info(const GPUShader *shader,
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Specialization API.
|
||||
*
|
||||
* Used to allow specialization constants.
|
||||
* IMPORTANT: All constants must be specified before binding a shader that needs specialization.
|
||||
* Otherwise, it will produce undefined behavior.
|
||||
* \{ */
|
||||
|
||||
void GPU_shader_constant_int_ex(GPUShader *sh, int location, int value);
|
||||
void GPU_shader_constant_uint_ex(GPUShader *sh, int location, unsigned int value);
|
||||
void GPU_shader_constant_float_ex(GPUShader *sh, int location, float value);
|
||||
void GPU_shader_constant_bool_ex(GPUShader *sh, int location, bool value);
|
||||
|
||||
void GPU_shader_constant_int(GPUShader *sh, const char *name, int value);
|
||||
void GPU_shader_constant_uint(GPUShader *sh, const char *name, unsigned int value);
|
||||
void GPU_shader_constant_float(GPUShader *sh, const char *name, float value);
|
||||
void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value);
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Legacy API
|
||||
*
|
||||
|
||||
@@ -298,6 +298,8 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
|
||||
|
||||
Shader *shader = GPUBackend::get()->shader_alloc(info.name_.c_str());
|
||||
|
||||
shader->specialization_constants_init(info);
|
||||
|
||||
std::string defines = shader->defines_declare(info);
|
||||
std::string resources = shader->resources_declare(info);
|
||||
|
||||
@@ -544,6 +546,59 @@ void GPU_shader_transform_feedback_disable(GPUShader *shader)
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Assign specialization constants.
|
||||
* \{ */
|
||||
|
||||
void Shader::specialization_constants_init(const shader::ShaderCreateInfo &info)
|
||||
{
|
||||
using namespace shader;
|
||||
for (const ShaderCreateInfo::SpecializationConstant &sc : info.specialization_constants_) {
|
||||
constants.types.append(sc.type);
|
||||
constants.values.append(sc.default_value);
|
||||
}
|
||||
}
|
||||
|
||||
void GPU_shader_constant_int_ex(GPUShader *sh, int location, int value)
|
||||
{
|
||||
BLI_assert(unwrap(sh)->constants.types[location] == gpu::shader::Type::INT);
|
||||
unwrap(sh)->constants.values[location].i = value;
|
||||
}
|
||||
void GPU_shader_constant_uint_ex(GPUShader *sh, int location, uint value)
|
||||
{
|
||||
BLI_assert(unwrap(sh)->constants.types[location] == gpu::shader::Type::UINT);
|
||||
unwrap(sh)->constants.values[location].u = value;
|
||||
}
|
||||
void GPU_shader_constant_float_ex(GPUShader *sh, int location, float value)
|
||||
{
|
||||
BLI_assert(unwrap(sh)->constants.types[location] == gpu::shader::Type::FLOAT);
|
||||
unwrap(sh)->constants.values[location].f = value;
|
||||
}
|
||||
void GPU_shader_constant_bool_ex(GPUShader *sh, int location, bool value)
|
||||
{
|
||||
BLI_assert(unwrap(sh)->constants.types[location] == gpu::shader::Type::BOOL);
|
||||
unwrap(sh)->constants.values[location].u = value;
|
||||
}
|
||||
|
||||
void GPU_shader_constant_int(GPUShader *sh, const char *name, int value)
|
||||
{
|
||||
GPU_shader_constant_int_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
|
||||
}
|
||||
void GPU_shader_constant_uint(GPUShader *sh, const char *name, uint value)
|
||||
{
|
||||
GPU_shader_constant_uint_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
|
||||
}
|
||||
void GPU_shader_constant_float(GPUShader *sh, const char *name, float value)
|
||||
{
|
||||
GPU_shader_constant_float_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
|
||||
}
|
||||
void GPU_shader_constant_bool(GPUShader *sh, const char *name, bool value)
|
||||
{
|
||||
GPU_shader_constant_bool_ex(sh, unwrap(sh)->interface->constant_get(name)->location, value);
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Uniforms / Resource location
|
||||
* \{ */
|
||||
@@ -555,6 +610,13 @@ int GPU_shader_get_uniform(GPUShader *shader, const char *name)
|
||||
return uniform ? uniform->location : -1;
|
||||
}
|
||||
|
||||
int GPU_shader_get_constant(GPUShader *shader, const char *name)
|
||||
{
|
||||
const ShaderInterface *interface = unwrap(shader)->interface;
|
||||
const ShaderInput *constant = interface->constant_get(name);
|
||||
return constant ? constant->location : -1;
|
||||
}
|
||||
|
||||
int GPU_shader_get_builtin_uniform(GPUShader *shader, int builtin)
|
||||
{
|
||||
const ShaderInterface *interface = unwrap(shader)->interface;
|
||||
|
||||
@@ -122,6 +122,7 @@ void ShaderCreateInfo::finalize()
|
||||
vertex_out_interfaces_.extend_non_duplicates(info.vertex_out_interfaces_);
|
||||
geometry_out_interfaces_.extend_non_duplicates(info.geometry_out_interfaces_);
|
||||
subpass_inputs_.extend_non_duplicates(info.subpass_inputs_);
|
||||
specialization_constants_.extend_non_duplicates(info.specialization_constants_);
|
||||
|
||||
validate_vertex_attributes(&info);
|
||||
|
||||
@@ -279,6 +280,16 @@ std::string ShaderCreateInfo::check_error() const
|
||||
" contains a stage interface using an instance name and mixed interpolation modes. "
|
||||
"This is not compatible with Vulkan and need to be adjusted.\n";
|
||||
}
|
||||
|
||||
/* Validate specialization constants. */
|
||||
for (int i = 0; i < specialization_constants_.size(); i++) {
|
||||
for (int j = i + 1; j < specialization_constants_.size(); j++) {
|
||||
if (specialization_constants_[i].name == specialization_constants_[j].name) {
|
||||
error += this->name_ + " contains two specialization constants with the name: " +
|
||||
std::string(specialization_constants_[i].name);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return error;
|
||||
|
||||
@@ -161,6 +161,8 @@ static inline std::ostream &operator<<(std::ostream &stream, const Type type)
|
||||
return stream << "short3";
|
||||
case Type::SHORT4:
|
||||
return stream << "short4";
|
||||
case Type::BOOL:
|
||||
return stream << "bool";
|
||||
default:
|
||||
BLI_assert(0);
|
||||
return stream;
|
||||
@@ -495,6 +497,34 @@ struct ShaderCreateInfo {
|
||||
using SubpassIn = FragOut;
|
||||
Vector<SubpassIn> subpass_inputs_;
|
||||
|
||||
struct SpecializationConstant {
|
||||
struct Value {
|
||||
union {
|
||||
uint32_t u;
|
||||
int32_t i;
|
||||
float f;
|
||||
};
|
||||
|
||||
bool operator==(const Value &other) const
|
||||
{
|
||||
return u == other.u;
|
||||
}
|
||||
};
|
||||
|
||||
Type type;
|
||||
StringRefNull name;
|
||||
Value default_value;
|
||||
|
||||
bool operator==(const SpecializationConstant &b) const
|
||||
{
|
||||
TEST_EQUAL(*this, b, type);
|
||||
TEST_EQUAL(*this, b, name);
|
||||
TEST_EQUAL(*this, b, default_value);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
Vector<SpecializationConstant> specialization_constants_;
|
||||
|
||||
struct Sampler {
|
||||
ImageType type;
|
||||
GPUSamplerState sampler;
|
||||
@@ -713,6 +743,62 @@ struct ShaderCreateInfo {
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Shader specialization constants
|
||||
* \{ */
|
||||
|
||||
/* Adds a specialization constant which is a dynamically modifiable value, which will be
|
||||
* statically compiled into a PSO configuration to provide optimal runtime performance,
|
||||
* with a reduced re-compilation cost vs Macro's with easier generation of unique permutations
|
||||
* based on run-time values.
|
||||
*
|
||||
* Tip: To evaluate use-cases of where specialization constants can provide a performance
|
||||
* gain, benchmark a given shader in its default case. Attempt to statically disable branches or
|
||||
* conditions which rely on uniform look-ups and measure if there is a marked improvement in
|
||||
* performance and/or reduction in memory bandwidth/register pressure.
|
||||
*
|
||||
* NOTE: Specialization constants will incur new compilation of PSOs and thus can incur an
|
||||
* unexpected cost. Specialization constants should be reserved for infrequently changing
|
||||
* parameters (e.g. user setting parameters such as toggling of features or quality level
|
||||
* presets), or those with a low set of possible runtime permutations.
|
||||
*
|
||||
* Specialization constants are assigned at runtime using:
|
||||
* - `GPU_shader_constant_*(shader, name, value)`
|
||||
* or
|
||||
* - `DrawPass::specialize_constant(shader, name, value)`
|
||||
*
|
||||
* All constants **MUST** be specified before binding a shader.
|
||||
*/
|
||||
Self &specialization_constant(Type type, StringRefNull name, double default_value)
|
||||
{
|
||||
SpecializationConstant constant;
|
||||
constant.type = type;
|
||||
constant.name = name;
|
||||
switch (type) {
|
||||
case Type::INT:
|
||||
constant.default_value.i = static_cast<int>(default_value);
|
||||
break;
|
||||
case Type::BOOL:
|
||||
case Type::UINT:
|
||||
constant.default_value.u = static_cast<uint>(default_value);
|
||||
break;
|
||||
case Type::FLOAT:
|
||||
constant.default_value.f = static_cast<float>(default_value);
|
||||
break;
|
||||
default:
|
||||
BLI_assert_msg(0, "Only scalar types can be used as constants");
|
||||
break;
|
||||
}
|
||||
specialization_constants_.append(constant);
|
||||
return *(Self *)this;
|
||||
}
|
||||
|
||||
/* TODO: Add API to specify unique specialization config permutations in CreateInfo, allowing
|
||||
* specialized compilation to be primed and handled in the background at start-up, rather than
|
||||
* waiting for a given permutation to occur dynamically. */
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Resources bindings points
|
||||
* \{ */
|
||||
|
||||
@@ -55,12 +55,17 @@ void ShaderInterface::sort_inputs()
|
||||
/* Sorts all inputs inside their respective array.
|
||||
* This is to allow fast hash collision detection.
|
||||
* See `ShaderInterface::input_lookup` for more details. */
|
||||
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_, attr_len_));
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_ + attr_len_, ubo_len_));
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_ + attr_len_ + ubo_len_, uniform_len_));
|
||||
sort_input_list(
|
||||
MutableSpan<ShaderInput>(inputs_ + attr_len_ + ubo_len_ + uniform_len_, ssbo_len_));
|
||||
uint offset = 0;
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_ + offset, attr_len_));
|
||||
offset += attr_len_;
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_ + offset, ubo_len_));
|
||||
offset += ubo_len_;
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_ + offset, uniform_len_));
|
||||
offset += uniform_len_;
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_ + offset, ssbo_len_));
|
||||
offset += ssbo_len_;
|
||||
sort_input_list(MutableSpan<ShaderInput>(inputs_ + offset, constant_len_));
|
||||
offset += constant_len_;
|
||||
}
|
||||
|
||||
void ShaderInterface::debug_print() const
|
||||
|
||||
@@ -48,7 +48,7 @@ class ShaderInterface {
|
||||
friend shader::ShaderCreateInfo;
|
||||
/* TODO(fclem): should be protected. */
|
||||
public:
|
||||
/** Flat array. In this order: Attributes, Ubos, Uniforms. */
|
||||
/** Flat array. In this order: Attributes, Ubos, Uniforms, SSBOs, Constants. */
|
||||
ShaderInput *inputs_ = nullptr;
|
||||
/** Buffer containing all inputs names separated by '\0'. */
|
||||
char *name_buffer_ = nullptr;
|
||||
@@ -57,6 +57,7 @@ class ShaderInterface {
|
||||
uint ubo_len_ = 0;
|
||||
uint uniform_len_ = 0;
|
||||
uint ssbo_len_ = 0;
|
||||
uint constant_len_ = 0;
|
||||
/** Enabled bind-points that needs to be fed with data. */
|
||||
uint16_t enabled_attr_mask_ = 0;
|
||||
uint16_t enabled_ubo_mask_ = 0;
|
||||
@@ -117,6 +118,12 @@ class ShaderInterface {
|
||||
return input_lookup(inputs_ + attr_len_ + ubo_len_ + uniform_len_, ssbo_len_, binding);
|
||||
}
|
||||
|
||||
inline const ShaderInput *constant_get(const char *name) const
|
||||
{
|
||||
return input_lookup(
|
||||
inputs_ + attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_, constant_len_, name);
|
||||
}
|
||||
|
||||
inline const char *input_name_get(const ShaderInput *input) const
|
||||
{
|
||||
return name_buffer_ + input->name_offset;
|
||||
|
||||
@@ -16,6 +16,8 @@
|
||||
#include "gpu_shader_interface.hh"
|
||||
#include "gpu_vertex_buffer_private.hh"
|
||||
|
||||
#include "BLI_map.hh"
|
||||
|
||||
#include <string>
|
||||
|
||||
namespace blender {
|
||||
@@ -32,6 +34,18 @@ class Shader {
|
||||
/** Uniform & attribute locations for shader. */
|
||||
ShaderInterface *interface = nullptr;
|
||||
|
||||
/**
|
||||
* Specialization constants as a Struct-of-Arrays. Allow simpler comparison and reset.
|
||||
* The backend is free to implement their support as they see fit.
|
||||
*/
|
||||
struct Constants {
|
||||
using Value = shader::ShaderCreateInfo::SpecializationConstant::Value;
|
||||
Vector<gpu::shader::Type> types;
|
||||
/* Current values set by `GPU_shader_constant_*()` call. The backend can choose to interpret
|
||||
* that however it wants (i.e: bind another shader instead). */
|
||||
Vector<Value> values;
|
||||
} constants;
|
||||
|
||||
protected:
|
||||
/** For debugging purpose. */
|
||||
char name[64];
|
||||
@@ -68,6 +82,9 @@ class Shader {
|
||||
virtual void uniform_float(int location, int comp_len, int array_size, const float *data) = 0;
|
||||
virtual void uniform_int(int location, int comp_len, int array_size, const int *data) = 0;
|
||||
|
||||
/* Add specialization constant declarations to shader instance. */
|
||||
void specialization_constants_init(const shader::ShaderCreateInfo &info);
|
||||
|
||||
std::string defines_declare(const shader::ShaderCreateInfo &info) const;
|
||||
virtual std::string resources_declare(const shader::ShaderCreateInfo &info) const = 0;
|
||||
virtual std::string vertex_interface_declare(const shader::ShaderCreateInfo &info) const = 0;
|
||||
|
||||
@@ -839,13 +839,13 @@ class MTLContext : public Context {
|
||||
const MTLRenderPipelineStateInstance *pipeline_state_instance);
|
||||
bool ensure_buffer_bindings(id<MTLComputeCommandEncoder> rec,
|
||||
const MTLShaderInterface *shader_interface,
|
||||
const MTLComputePipelineStateInstance &pipeline_state_instance);
|
||||
const MTLComputePipelineStateInstance *pipeline_state_instance);
|
||||
void ensure_texture_bindings(id<MTLRenderCommandEncoder> rec,
|
||||
MTLShaderInterface *shader_interface,
|
||||
const MTLRenderPipelineStateInstance *pipeline_state_instance);
|
||||
void ensure_texture_bindings(id<MTLComputeCommandEncoder> rec,
|
||||
MTLShaderInterface *shader_interface,
|
||||
const MTLComputePipelineStateInstance &pipeline_state_instance);
|
||||
const MTLComputePipelineStateInstance *pipeline_state_instance);
|
||||
void ensure_depth_stencil_state(MTLPrimitiveType prim_type);
|
||||
|
||||
id<MTLBuffer> get_null_buffer();
|
||||
@@ -854,7 +854,8 @@ class MTLContext : public Context {
|
||||
void free_dummy_resources();
|
||||
|
||||
/* Compute. */
|
||||
bool ensure_compute_pipeline_state();
|
||||
/* Ensure compute pipeline state for current config is compiled and return PSO instance. */
|
||||
const MTLComputePipelineStateInstance *ensure_compute_pipeline_state();
|
||||
void compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len);
|
||||
void compute_dispatch_indirect(StorageBuf *indirect_buf);
|
||||
|
||||
|
||||
@@ -1399,7 +1399,7 @@ bool MTLContext::ensure_buffer_bindings(
|
||||
bool MTLContext::ensure_buffer_bindings(
|
||||
id<MTLComputeCommandEncoder> /*rec*/,
|
||||
const MTLShaderInterface *shader_interface,
|
||||
const MTLComputePipelineStateInstance &pipeline_state_instance)
|
||||
const MTLComputePipelineStateInstance *pipeline_state_instance)
|
||||
{
|
||||
/* Fetch Compute Pass state. */
|
||||
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
|
||||
@@ -1411,7 +1411,7 @@ bool MTLContext::ensure_buffer_bindings(
|
||||
/* Fetch uniform buffer base binding index from pipeline_state_instance - There buffer index
|
||||
* will be offset by the number of bound VBOs. */
|
||||
uint32_t block_size = push_constant_block.size;
|
||||
uint32_t buffer_index = pipeline_state_instance.base_uniform_buffer_index +
|
||||
uint32_t buffer_index = pipeline_state_instance->base_uniform_buffer_index +
|
||||
push_constant_block.buffer_index;
|
||||
BLI_assert(buffer_index >= 0 && buffer_index < MTL_MAX_BUFFER_BINDINGS);
|
||||
|
||||
@@ -1468,7 +1468,7 @@ bool MTLContext::ensure_buffer_bindings(
|
||||
shader_interface->get_name(),
|
||||
shader_interface->get_name_at_offset(ubo.name_offset),
|
||||
ubo_location,
|
||||
pipeline_state_instance.base_uniform_buffer_index + buffer_index);
|
||||
pipeline_state_instance->base_uniform_buffer_index + buffer_index);
|
||||
bind_dummy_buffer = true;
|
||||
}
|
||||
|
||||
@@ -1479,7 +1479,7 @@ bool MTLContext::ensure_buffer_bindings(
|
||||
}
|
||||
|
||||
if (ubo_buffer != nil) {
|
||||
uint32_t buffer_bind_index = pipeline_state_instance.base_uniform_buffer_index +
|
||||
uint32_t buffer_bind_index = pipeline_state_instance->base_uniform_buffer_index +
|
||||
buffer_index;
|
||||
|
||||
/* Bind Compute UBO. */
|
||||
@@ -1533,7 +1533,7 @@ bool MTLContext::ensure_buffer_bindings(
|
||||
shader_interface->get_name(),
|
||||
shader_interface->get_name_at_offset(ssbo.name_offset),
|
||||
ssbo_location,
|
||||
pipeline_state_instance.base_storage_buffer_index + buffer_index);
|
||||
pipeline_state_instance->base_storage_buffer_index + buffer_index);
|
||||
|
||||
#if DEBUG_BIND_NULL_BUFFER_FOR_MISSING_SSBO == 1
|
||||
ssbo_buffer = this->get_null_buffer();
|
||||
@@ -1542,7 +1542,7 @@ bool MTLContext::ensure_buffer_bindings(
|
||||
}
|
||||
|
||||
if (ssbo_buffer != nil) {
|
||||
uint32_t buffer_bind_index = pipeline_state_instance.base_storage_buffer_index +
|
||||
uint32_t buffer_bind_index = pipeline_state_instance->base_storage_buffer_index +
|
||||
buffer_index;
|
||||
|
||||
/* Bind Compute SSBO. */
|
||||
@@ -1559,7 +1559,7 @@ bool MTLContext::ensure_buffer_bindings(
|
||||
shader_interface->get_name(),
|
||||
shader_interface->get_name_at_offset(ssbo.name_offset),
|
||||
ssbo_location,
|
||||
pipeline_state_instance.base_storage_buffer_index + buffer_index);
|
||||
pipeline_state_instance->base_storage_buffer_index + buffer_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1823,7 +1823,7 @@ void MTLContext::ensure_texture_bindings(
|
||||
void MTLContext::ensure_texture_bindings(
|
||||
id<MTLComputeCommandEncoder> rec,
|
||||
MTLShaderInterface *shader_interface,
|
||||
const MTLComputePipelineStateInstance &pipeline_state_instance)
|
||||
const MTLComputePipelineStateInstance *pipeline_state_instance)
|
||||
{
|
||||
BLI_assert(shader_interface != nil);
|
||||
BLI_assert(rec != nil);
|
||||
@@ -1988,13 +1988,13 @@ void MTLContext::ensure_texture_bindings(
|
||||
* The first N slots, prior to `pipeline_state_instance->base_uniform_buffer_index` are
|
||||
* used by vertex and index buffer bindings, and the number of buffers present will vary
|
||||
* between PSOs. */
|
||||
int arg_buffer_idx = (pipeline_state_instance.base_uniform_buffer_index +
|
||||
int arg_buffer_idx = (pipeline_state_instance->base_uniform_buffer_index +
|
||||
compute_arg_buffer_bind_index);
|
||||
assert(arg_buffer_idx < 32);
|
||||
id<MTLArgumentEncoder> argument_encoder = shader_interface->find_argument_encoder(
|
||||
arg_buffer_idx);
|
||||
if (argument_encoder == nil) {
|
||||
argument_encoder = [pipeline_state_instance.compute
|
||||
argument_encoder = [pipeline_state_instance->compute
|
||||
newArgumentEncoderWithBufferIndex:arg_buffer_idx];
|
||||
shader_interface->insert_argument_encoder(arg_buffer_idx, argument_encoder);
|
||||
}
|
||||
@@ -2038,7 +2038,7 @@ void MTLContext::ensure_texture_bindings(
|
||||
}
|
||||
|
||||
BLI_assert(encoder_buffer != nullptr);
|
||||
int compute_buffer_index = (pipeline_state_instance.base_uniform_buffer_index +
|
||||
int compute_buffer_index = (pipeline_state_instance->base_uniform_buffer_index +
|
||||
compute_arg_buffer_bind_index);
|
||||
cs.bind_compute_buffer(encoder_buffer->get_metal_buffer(), 0, compute_buffer_index);
|
||||
}
|
||||
@@ -2185,19 +2185,19 @@ void MTLContext::ensure_depth_stencil_state(MTLPrimitiveType prim_type)
|
||||
/** \name Compute dispatch.
|
||||
* \{ */
|
||||
|
||||
bool MTLContext::ensure_compute_pipeline_state()
|
||||
const MTLComputePipelineStateInstance *MTLContext::ensure_compute_pipeline_state()
|
||||
{
|
||||
/* Verify if bound shader is valid and fetch MTLComputePipelineStateInstance. */
|
||||
/* Check if an active shader is bound. */
|
||||
if (!this->pipeline_state.active_shader) {
|
||||
MTL_LOG_WARNING("No Metal shader bound!");
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
/* Also ensure active shader is valid. */
|
||||
if (!this->pipeline_state.active_shader->is_valid()) {
|
||||
MTL_LOG_WARNING(
|
||||
"Bound active shader is not valid (Missing/invalid implementation for Metal).", );
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
/* Verify this is a compute shader. */
|
||||
|
||||
@@ -2205,24 +2205,24 @@ bool MTLContext::ensure_compute_pipeline_state()
|
||||
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
|
||||
if (shader_interface == nullptr) {
|
||||
MTL_LOG_WARNING("Bound active shader does not have a valid shader interface!", );
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
bool success = this->pipeline_state.active_shader->bake_compute_pipeline_state(this);
|
||||
const MTLComputePipelineStateInstance &compute_pso_inst =
|
||||
this->pipeline_state.active_shader->get_compute_pipeline_state();
|
||||
if (!success || compute_pso_inst.pso == nil) {
|
||||
const MTLComputePipelineStateInstance *compute_pso_inst =
|
||||
this->pipeline_state.active_shader->bake_compute_pipeline_state(this);
|
||||
if (compute_pso_inst == nullptr || compute_pso_inst->pso == nil) {
|
||||
MTL_LOG_WARNING("No valid compute PSO for compute dispatch!", );
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
return true;
|
||||
return compute_pso_inst;
|
||||
}
|
||||
|
||||
void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups_z_len)
|
||||
{
|
||||
/* Ensure all resources required by upcoming compute submission are correctly bound to avoid
|
||||
* out of bounds reads/writes. */
|
||||
if (!this->ensure_compute_pipeline_state()) {
|
||||
const MTLComputePipelineStateInstance *compute_pso_inst = this->ensure_compute_pipeline_state();
|
||||
if (compute_pso_inst == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2232,8 +2232,7 @@ void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups
|
||||
|
||||
/* Shader instance. */
|
||||
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
|
||||
const MTLComputePipelineStateInstance &compute_pso_inst =
|
||||
this->pipeline_state.active_shader->get_compute_pipeline_state();
|
||||
BLI_assert(compute_pso_inst != nullptr);
|
||||
|
||||
/* Begin compute encoder. */
|
||||
id<MTLComputeCommandEncoder> compute_encoder =
|
||||
@@ -2242,7 +2241,7 @@ void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups
|
||||
|
||||
/* Bind PSO. */
|
||||
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(compute_pso_inst.pso);
|
||||
cs.bind_pso(compute_pso_inst->pso);
|
||||
|
||||
/** Ensure resource bindings. */
|
||||
/* Texture Bindings. */
|
||||
@@ -2258,12 +2257,14 @@ void MTLContext::compute_dispatch(int groups_x_len, int groups_y_len, int groups
|
||||
this->ensure_buffer_bindings(compute_encoder, shader_interface, compute_pso_inst);
|
||||
|
||||
/* Dispatch compute. */
|
||||
const MTLComputePipelineStateCommon &compute_state_common =
|
||||
this->pipeline_state.active_shader->get_compute_common_state();
|
||||
[compute_encoder dispatchThreadgroups:MTLSizeMake(max_ii(groups_x_len, 1),
|
||||
max_ii(groups_y_len, 1),
|
||||
max_ii(groups_z_len, 1))
|
||||
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
|
||||
compute_pso_inst.threadgroup_y_len,
|
||||
compute_pso_inst.threadgroup_z_len)];
|
||||
threadsPerThreadgroup:MTLSizeMake(compute_state_common.threadgroup_x_len,
|
||||
compute_state_common.threadgroup_y_len,
|
||||
compute_state_common.threadgroup_z_len)];
|
||||
#if MTL_DEBUG_SINGLE_DISPATCH_PER_ENCODER == 1
|
||||
GPU_flush();
|
||||
#endif
|
||||
@@ -2277,54 +2278,55 @@ void MTLContext::compute_dispatch_indirect(StorageBuf *indirect_buf)
|
||||
#endif
|
||||
|
||||
/* Ensure all resources required by upcoming compute submission are correctly bound. */
|
||||
if (this->ensure_compute_pipeline_state()) {
|
||||
/* Shader instance. */
|
||||
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
|
||||
const MTLComputePipelineStateInstance &compute_pso_inst =
|
||||
this->pipeline_state.active_shader->get_compute_pipeline_state();
|
||||
const MTLComputePipelineStateInstance *compute_pso_inst = this->ensure_compute_pipeline_state();
|
||||
BLI_assert(compute_pso_inst != nullptr);
|
||||
|
||||
/* Begin compute encoder. */
|
||||
id<MTLComputeCommandEncoder> compute_encoder =
|
||||
this->main_command_buffer.ensure_begin_compute_encoder();
|
||||
BLI_assert(compute_encoder != nil);
|
||||
/* Shader instance. */
|
||||
MTLShaderInterface *shader_interface = this->pipeline_state.active_shader->get_interface();
|
||||
|
||||
/* Bind PSO. */
|
||||
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(compute_pso_inst.pso);
|
||||
/* Begin compute encoder. */
|
||||
id<MTLComputeCommandEncoder> compute_encoder =
|
||||
this->main_command_buffer.ensure_begin_compute_encoder();
|
||||
BLI_assert(compute_encoder != nil);
|
||||
|
||||
/** Ensure resource bindings. */
|
||||
/* Texture Bindings. */
|
||||
/* We will iterate through all texture bindings on the context and determine if any of the
|
||||
* active slots match those in our shader interface. If so, textures will be bound. */
|
||||
if (shader_interface->get_total_textures() > 0) {
|
||||
this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst);
|
||||
}
|
||||
/* Bind PSO. */
|
||||
MTLComputeState &cs = this->main_command_buffer.get_compute_state();
|
||||
cs.bind_pso(compute_pso_inst->pso);
|
||||
|
||||
/* Bind buffers.
|
||||
* NOTE: `ensure_buffer_bindings` must be called after `ensure_texture_bindings` to allow
|
||||
* for binding of buffer-backed texture's data buffer and metadata. */
|
||||
this->ensure_buffer_bindings(compute_encoder, shader_interface, compute_pso_inst);
|
||||
|
||||
/* Indirect Dispatch compute. */
|
||||
MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(indirect_buf);
|
||||
id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
|
||||
BLI_assert(mtl_indirect_buf != nil);
|
||||
if (mtl_indirect_buf == nil) {
|
||||
MTL_LOG_WARNING("Metal Indirect Compute dispatch storage buffer does not exist.");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Indirect Compute dispatch. */
|
||||
[compute_encoder
|
||||
dispatchThreadgroupsWithIndirectBuffer:mtl_indirect_buf
|
||||
indirectBufferOffset:0
|
||||
threadsPerThreadgroup:MTLSizeMake(compute_pso_inst.threadgroup_x_len,
|
||||
compute_pso_inst.threadgroup_y_len,
|
||||
compute_pso_inst.threadgroup_z_len)];
|
||||
#if MTL_DEBUG_SINGLE_DISPATCH_PER_ENCODER == 1
|
||||
GPU_flush();
|
||||
#endif
|
||||
/** Ensure resource bindings. */
|
||||
/* Texture Bindings. */
|
||||
/* We will iterate through all texture bindings on the context and determine if any of the
|
||||
* active slots match those in our shader interface. If so, textures will be bound. */
|
||||
if (shader_interface->get_total_textures() > 0) {
|
||||
this->ensure_texture_bindings(compute_encoder, shader_interface, compute_pso_inst);
|
||||
}
|
||||
|
||||
/* Bind buffers.
|
||||
* NOTE: `ensure_buffer_bindings` must be called after `ensure_texture_bindings` to allow
|
||||
* for binding of buffer-backed texture's data buffer and metadata. */
|
||||
this->ensure_buffer_bindings(compute_encoder, shader_interface, compute_pso_inst);
|
||||
|
||||
/* Indirect Dispatch compute. */
|
||||
MTLStorageBuf *mtlssbo = static_cast<MTLStorageBuf *>(indirect_buf);
|
||||
id<MTLBuffer> mtl_indirect_buf = mtlssbo->get_metal_buffer();
|
||||
BLI_assert(mtl_indirect_buf != nil);
|
||||
if (mtl_indirect_buf == nil) {
|
||||
MTL_LOG_WARNING("Metal Indirect Compute dispatch storage buffer does not exist.");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Indirect Compute dispatch. */
|
||||
const MTLComputePipelineStateCommon &compute_state_common =
|
||||
this->pipeline_state.active_shader->get_compute_common_state();
|
||||
[compute_encoder
|
||||
dispatchThreadgroupsWithIndirectBuffer:mtl_indirect_buf
|
||||
indirectBufferOffset:0
|
||||
threadsPerThreadgroup:MTLSizeMake(compute_state_common.threadgroup_x_len,
|
||||
compute_state_common.threadgroup_y_len,
|
||||
compute_state_common.threadgroup_z_len)];
|
||||
#if MTL_DEBUG_SINGLE_DISPATCH_PER_ENCODER == 1
|
||||
GPU_flush();
|
||||
#endif
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -11,6 +11,10 @@
|
||||
|
||||
#include <Metal/Metal.h>
|
||||
|
||||
#include "BLI_vector.hh"
|
||||
|
||||
#include "gpu_shader_private.hh"
|
||||
|
||||
namespace blender::gpu {
|
||||
|
||||
/**
|
||||
@@ -171,6 +175,29 @@ struct MTLVertexDescriptor {
|
||||
}
|
||||
};
|
||||
|
||||
struct SpecializationStateDescriptor {
|
||||
Vector<Shader::Constants::Value> values;
|
||||
|
||||
SpecializationStateDescriptor() = default;
|
||||
SpecializationStateDescriptor(Vector<Shader::Constants::Value> source) : values(source) {}
|
||||
|
||||
bool operator==(const SpecializationStateDescriptor &other) const
|
||||
{
|
||||
return values == other.values;
|
||||
}
|
||||
|
||||
uint64_t hash() const
|
||||
{
|
||||
uint64_t hash = values.size();
|
||||
uint seed = 0xFF;
|
||||
for (const Shader::Constants::Value &value : values) {
|
||||
seed = seed << 1;
|
||||
hash ^= seed ^ value.u;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
/* Metal Render Pipeline State Descriptor -- All unique information which feeds PSO creation. */
|
||||
struct MTLRenderPipelineStateDescriptor {
|
||||
/* This state descriptor will contain ALL parameters which generate a unique PSO.
|
||||
@@ -178,9 +205,10 @@ struct MTLRenderPipelineStateDescriptor {
|
||||
* new PSO for the current shader.
|
||||
*
|
||||
* Unlike the 'MTLContextGlobalShaderPipelineState', this struct contains a subset of
|
||||
* parameters used to distinguish between unique PSOs. This struct is hash-able and only contains
|
||||
* those parameters which are required by PSO generation. Non-unique state such as bound
|
||||
* resources is not tracked here, as it does not require a unique PSO permutation if changed. */
|
||||
* parameters used to distinguish between unique PSOs. This struct is hash-able and only
|
||||
* contains those parameters which are required by PSO generation. Non-unique state such as
|
||||
* bound resources is not tracked here, as it does not require a unique PSO permutation if
|
||||
* changed. */
|
||||
|
||||
/* Input Vertex Descriptor. */
|
||||
MTLVertexDescriptor vertex_descriptor;
|
||||
@@ -210,6 +238,9 @@ struct MTLRenderPipelineStateDescriptor {
|
||||
/* Point size required by point primitives. */
|
||||
float point_size = 0.0f;
|
||||
|
||||
/* Specialization constants map. */
|
||||
SpecializationStateDescriptor specialization_state;
|
||||
|
||||
/* Comparison Operator for caching. */
|
||||
bool operator==(const MTLRenderPipelineStateDescriptor &other) const
|
||||
{
|
||||
@@ -244,6 +275,10 @@ struct MTLRenderPipelineStateDescriptor {
|
||||
}
|
||||
}
|
||||
|
||||
if (!(specialization_state == other.specialization_state)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -284,6 +319,9 @@ struct MTLRenderPipelineStateDescriptor {
|
||||
/* Clipping plane enablement. */
|
||||
hash ^= uint64_t(clipping_plane_enable_mask) << 20;
|
||||
|
||||
/* Specialization constants. We can treat the raw bytes as uint. */
|
||||
hash ^= specialization_state.hash();
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
@@ -302,4 +340,23 @@ struct MTLRenderPipelineStateDescriptor {
|
||||
}
|
||||
};
|
||||
|
||||
/* Metal Compute Pipeline State Descriptor containing all unique information which feeds PSO
|
||||
* creation. */
|
||||
struct MTLComputePipelineStateDescriptor {
|
||||
|
||||
/* Specialization constants map. */
|
||||
SpecializationStateDescriptor specialization_state;
|
||||
|
||||
/* Comparison Operator for caching. */
|
||||
bool operator==(const MTLComputePipelineStateDescriptor &other) const
|
||||
{
|
||||
return (specialization_state == other.specialization_state);
|
||||
}
|
||||
|
||||
uint64_t hash() const
|
||||
{
|
||||
return specialization_state.hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace blender::gpu
|
||||
|
||||
@@ -48,6 +48,13 @@ class MTLContext;
|
||||
# define shader_debug_printf(...) /* Null print. */
|
||||
#endif
|
||||
|
||||
/* Offset base specialization constant ID for function constants declared in CreateInfo. */
|
||||
#define MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID 30
|
||||
/* Maximum threshold for specialized shader variant count.
|
||||
* This is a catch-all to prevent excessive PSO permutations from being created and also catch
|
||||
* parameters which should ideally not be used for specialization. */
|
||||
#define MTL_SHADER_MAX_SPECIALIZED_PSOS 5
|
||||
|
||||
/* Desired reflection data for a buffer binding. */
|
||||
struct MTLBufferArgumentData {
|
||||
uint32_t index;
|
||||
@@ -92,19 +99,10 @@ struct MTLRenderPipelineStateInstance {
|
||||
blender::Vector<MTLBufferArgumentData> buffer_bindings_reflection_data_frag;
|
||||
};
|
||||
|
||||
/* Metal COmpute Pipeline State instance. */
|
||||
struct MTLComputePipelineStateInstance {
|
||||
/* Function instances with specialization.
|
||||
* Required for argument encoder construction. */
|
||||
id<MTLFunction> compute = nil;
|
||||
/* PSO handle. */
|
||||
id<MTLComputePipelineState> pso = nil;
|
||||
/* Base bind index for binding uniform buffers, offset based on other
|
||||
* bound buffers such as vertex buffers, as the count can vary. */
|
||||
int base_uniform_buffer_index = -1;
|
||||
/* Base bind index for binding storage buffers. */
|
||||
int base_storage_buffer_index = -1;
|
||||
/* Common compute pipeline state. */
|
||||
struct MTLComputePipelineStateCommon {
|
||||
|
||||
/* Threadgroup information is common for all PSO variants.*/
|
||||
int threadgroup_x_len = 1;
|
||||
int threadgroup_y_len = 1;
|
||||
int threadgroup_z_len = 1;
|
||||
@@ -119,6 +117,25 @@ struct MTLComputePipelineStateInstance {
|
||||
}
|
||||
};
|
||||
|
||||
/* Metal Compute Pipeline State instance per PSO. */
|
||||
struct MTLComputePipelineStateInstance {
|
||||
|
||||
/** Derived information. */
|
||||
/* Unique index for PSO variant. */
|
||||
uint32_t shader_pso_index;
|
||||
/* Base bind index for binding uniform buffers, offset based on other
|
||||
* bound buffers such as vertex buffers, as the count can vary. */
|
||||
int base_uniform_buffer_index = -1;
|
||||
/* Base bind index for binding storage buffers. */
|
||||
int base_storage_buffer_index = -1;
|
||||
|
||||
/* Function instances with specialization.
|
||||
* Required for argument encoder construction. */
|
||||
id<MTLFunction> compute = nil;
|
||||
/* PSO handle. */
|
||||
id<MTLComputePipelineState> pso = nil;
|
||||
};
|
||||
|
||||
/* #MTLShaderBuilder source wrapper used during initial compilation. */
|
||||
struct MTLShaderBuilder {
|
||||
NSString *msl_source_vert_ = @"";
|
||||
@@ -195,7 +212,9 @@ class MTLShader : public Shader {
|
||||
std::mutex pso_cache_lock_;
|
||||
|
||||
/** Compute pipeline state and Compute PSO caching. */
|
||||
MTLComputePipelineStateInstance compute_pso_instance_;
|
||||
MTLComputePipelineStateCommon compute_pso_common_state_;
|
||||
blender::Map<MTLComputePipelineStateDescriptor, MTLComputePipelineStateInstance *>
|
||||
compute_pso_cache_;
|
||||
|
||||
/* True to enable multi-layered rendering support. */
|
||||
bool uses_gpu_layer = false;
|
||||
@@ -350,9 +369,11 @@ class MTLShader : public Shader {
|
||||
MTLPrimitiveTopologyClass prim_type,
|
||||
const MTLRenderPipelineStateDescriptor &pipeline_descriptor);
|
||||
|
||||
bool bake_compute_pipeline_state(MTLContext *ctx);
|
||||
const MTLComputePipelineStateInstance &get_compute_pipeline_state();
|
||||
|
||||
MTLComputePipelineStateInstance *bake_compute_pipeline_state(MTLContext *ctx);
|
||||
const MTLComputePipelineStateCommon &get_compute_common_state()
|
||||
{
|
||||
return compute_pso_common_state_;
|
||||
}
|
||||
/* Transform Feedback. */
|
||||
GPUVertBuf *get_transform_feedback_active_buffer();
|
||||
bool has_transform_feedback_varying(std::string str);
|
||||
|
||||
@@ -144,17 +144,19 @@ MTLShader::~MTLShader()
|
||||
delete pso_inst;
|
||||
}
|
||||
pso_cache_.clear();
|
||||
|
||||
/* Free Compute pipeline cache. */
|
||||
for (const MTLComputePipelineStateInstance *pso_inst : compute_pso_cache_.values()) {
|
||||
if (pso_inst->compute) {
|
||||
[pso_inst->compute release];
|
||||
}
|
||||
if (pso_inst->pso) {
|
||||
[pso_inst->pso release];
|
||||
}
|
||||
}
|
||||
compute_pso_cache_.clear();
|
||||
pso_cache_lock_.unlock();
|
||||
|
||||
/* Free Compute pipeline state object. */
|
||||
if (compute_pso_instance_.compute) {
|
||||
[compute_pso_instance_.compute release];
|
||||
compute_pso_instance_.compute = nil;
|
||||
}
|
||||
if (compute_pso_instance_.pso) {
|
||||
[compute_pso_instance_.pso release];
|
||||
compute_pso_instance_.pso = nil;
|
||||
}
|
||||
/* NOTE(Metal): #ShaderInterface deletion is handled in the super destructor `~Shader()`. */
|
||||
}
|
||||
valid_ = false;
|
||||
@@ -435,7 +437,8 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
|
||||
push_constant_data_ = nullptr;
|
||||
}
|
||||
|
||||
/* If this is a compute shader, bake PSO for compute straight-away. */
|
||||
/* If this is a compute shader, bake base PSO for compute straight-away.
|
||||
* NOTE: This will compile the base unspecialized variant. */
|
||||
if (is_compute) {
|
||||
this->bake_compute_pipeline_state(context_);
|
||||
}
|
||||
@@ -447,11 +450,6 @@ bool MTLShader::finalize(const shader::ShaderCreateInfo *info)
|
||||
return true;
|
||||
}
|
||||
|
||||
const MTLComputePipelineStateInstance &MTLShader::get_compute_pipeline_state()
|
||||
{
|
||||
return this->compute_pso_instance_;
|
||||
}
|
||||
|
||||
void MTLShader::transform_feedback_names_set(Span<const char *> name_list,
|
||||
const eGPUShaderTFBType geom_type)
|
||||
{
|
||||
@@ -736,6 +734,44 @@ void MTLShader::set_interface(MTLShaderInterface *interface)
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Shader specialization common utilities.
|
||||
*
|
||||
* \{ */
|
||||
|
||||
/**
|
||||
* Populates `values` with the given `SpecializationStateDescriptor` values.
|
||||
*/
|
||||
static void populate_specialization_constant_values(
|
||||
MTLFunctionConstantValues *values,
|
||||
const Shader::Constants &shader_constants,
|
||||
const SpecializationStateDescriptor &specialization_descriptor)
|
||||
{
|
||||
for (auto i : shader_constants.types.index_range()) {
|
||||
const Shader::Constants::Value &value = specialization_descriptor.values[i];
|
||||
|
||||
uint index = i + MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID;
|
||||
switch (shader_constants.types[i]) {
|
||||
case Type::INT:
|
||||
[values setConstantValue:&value.i type:MTLDataTypeInt atIndex:index];
|
||||
break;
|
||||
case Type::UINT:
|
||||
[values setConstantValue:&value.u type:MTLDataTypeUInt atIndex:index];
|
||||
break;
|
||||
case Type::BOOL:
|
||||
[values setConstantValue:&value.u type:MTLDataTypeBool atIndex:index];
|
||||
break;
|
||||
case Type::FLOAT:
|
||||
[values setConstantValue:&value.f type:MTLDataTypeFloat atIndex:index];
|
||||
break;
|
||||
default:
|
||||
BLI_assert_msg(false, "Unsupported custom constant type.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/** \name Bake Pipeline State Objects
|
||||
* \{ */
|
||||
@@ -828,6 +864,9 @@ MTLRenderPipelineStateInstance *MTLShader::bake_current_pipeline_state(
|
||||
pipeline_descriptor.vertex_descriptor.prim_topology_class =
|
||||
(requires_specific_topology_class) ? prim_type : MTLPrimitiveTopologyClassUnspecified;
|
||||
|
||||
/* Specialization configuration. */
|
||||
pipeline_descriptor.specialization_state = {this->constants.values};
|
||||
|
||||
/* Bake pipeline state using global descriptor. */
|
||||
return bake_pipeline_state(ctx, prim_type, pipeline_descriptor);
|
||||
}
|
||||
@@ -854,6 +893,13 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
|
||||
return pipeline_state;
|
||||
}
|
||||
|
||||
/* TODO: When fetching a specialized variant of a shader, if this does not yet exist, verify
|
||||
* whether the base unspecialized variant exists:
|
||||
* - If unspecialized version exists: Compile specialized PSO asynchronously, returning base PSO
|
||||
* and flagging state of specialization in cache as being built.
|
||||
* - If unspecialized does NOT exist, build specialized version straight away, as we pay the
|
||||
* cost of compilation in both cases regardless. */
|
||||
|
||||
/* Generate new Render Pipeline State Object (PSO). */
|
||||
@autoreleasepool {
|
||||
/* Prepare Render Pipeline Descriptor. */
|
||||
@@ -862,6 +908,10 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
|
||||
* generated code based on current render pipeline configuration. */
|
||||
MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
|
||||
|
||||
/* Custom function constant values: */
|
||||
populate_specialization_constant_values(
|
||||
values, this->constants, pipeline_descriptor.specialization_state);
|
||||
|
||||
/* Prepare Vertex descriptor based on current pipeline vertex binding state. */
|
||||
MTLRenderPipelineDescriptor *desc = pso_descriptor_;
|
||||
[desc reset];
|
||||
@@ -937,7 +987,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
|
||||
{
|
||||
shader_debug_printf(
|
||||
"TODO(Metal): Shader %s needs to support internal format conversion\n",
|
||||
mtl_interface->name);
|
||||
mtl_interface->get_name());
|
||||
}
|
||||
|
||||
/* Copy metal back-end attribute descriptor state into PSO descriptor.
|
||||
@@ -1347,7 +1397,7 @@ MTLRenderPipelineStateInstance *MTLShader::bake_pipeline_state(
|
||||
}
|
||||
}
|
||||
|
||||
bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
||||
MTLComputePipelineStateInstance *MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
||||
{
|
||||
/* NOTE(Metal): Bakes and caches a PSO for compute. */
|
||||
BLI_assert(this);
|
||||
@@ -1356,13 +1406,38 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
||||
BLI_assert(this->is_valid());
|
||||
BLI_assert(shader_library_compute_ != nil);
|
||||
|
||||
if (compute_pso_instance_.pso == nil) {
|
||||
/* Evaluate descriptor for specialization constants. */
|
||||
MTLComputePipelineStateDescriptor compute_pipeline_descriptor;
|
||||
|
||||
/* Specialization configuration.
|
||||
* NOTE: If allow_specialized is disabled, we will build the base un-specialized variant. */
|
||||
compute_pipeline_descriptor.specialization_state = {this->constants.values};
|
||||
|
||||
/* Check if current PSO exists in the cache. */
|
||||
pso_cache_lock_.lock();
|
||||
MTLComputePipelineStateInstance **pso_lookup = compute_pso_cache_.lookup_ptr(
|
||||
compute_pipeline_descriptor);
|
||||
MTLComputePipelineStateInstance *pipeline_state = (pso_lookup) ? *pso_lookup : nullptr;
|
||||
pso_cache_lock_.unlock();
|
||||
|
||||
if (pipeline_state != nullptr) {
|
||||
/* Return cached PSO state. */
|
||||
BLI_assert(pipeline_state->pso != nil);
|
||||
return pipeline_state;
|
||||
}
|
||||
else {
|
||||
/* Prepare Compute Pipeline Descriptor. */
|
||||
|
||||
/* Setup function specialization constants, used to modify and optimize
|
||||
* generated code based on current render pipeline configuration. */
|
||||
MTLFunctionConstantValues *values = [[MTLFunctionConstantValues new] autorelease];
|
||||
|
||||
/* TODO: Compile specialized shader variants asynchronously. */
|
||||
|
||||
/* Custom function constant values: */
|
||||
populate_specialization_constant_values(
|
||||
values, this->constants, compute_pipeline_descriptor.specialization_state);
|
||||
|
||||
/* Offset the bind index for Uniform buffers such that they begin after the VBO
|
||||
* buffer bind slots. `MTL_uniform_buffer_base_index` is passed as a function
|
||||
* specialization constant, customized per unique pipeline state permutation.
|
||||
@@ -1403,7 +1478,7 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
||||
if ([[error localizedDescription] rangeOfString:@"Compilation succeeded"].location ==
|
||||
NSNotFound) {
|
||||
BLI_assert(false);
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1421,13 +1496,13 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
||||
if (error) {
|
||||
NSLog(@"Failed to create PSO for compute shader: %s error %@\n", this->name, error);
|
||||
BLI_assert(false);
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
else if (!pso) {
|
||||
NSLog(@"Failed to create PSO for compute shader: %s, but no error was provided!\n",
|
||||
this->name);
|
||||
BLI_assert(false);
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
else {
|
||||
#if 0
|
||||
@@ -1438,12 +1513,19 @@ bool MTLShader::bake_compute_pipeline_state(MTLContext *ctx)
|
||||
}
|
||||
|
||||
/* Gather reflection data and create MTLComputePipelineStateInstance to store results. */
|
||||
compute_pso_instance_.compute = [compute_function retain];
|
||||
compute_pso_instance_.pso = [pso retain];
|
||||
compute_pso_instance_.base_uniform_buffer_index = MTL_uniform_buffer_base_index;
|
||||
compute_pso_instance_.base_storage_buffer_index = MTL_storage_buffer_base_index;
|
||||
MTLComputePipelineStateInstance *compute_pso_instance = new MTLComputePipelineStateInstance();
|
||||
compute_pso_instance->compute = [compute_function retain];
|
||||
compute_pso_instance->pso = [pso retain];
|
||||
compute_pso_instance->base_uniform_buffer_index = MTL_uniform_buffer_base_index;
|
||||
compute_pso_instance->base_storage_buffer_index = MTL_storage_buffer_base_index;
|
||||
|
||||
pso_cache_lock_.lock();
|
||||
compute_pso_instance->shader_pso_index = compute_pso_cache_.size();
|
||||
compute_pso_cache_.add(compute_pipeline_descriptor, compute_pso_instance);
|
||||
pso_cache_lock_.unlock();
|
||||
|
||||
return compute_pso_instance;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/** \} */
|
||||
|
||||
|
||||
@@ -218,6 +218,15 @@ struct MSLUniform {
|
||||
}
|
||||
};
|
||||
|
||||
struct MSLConstant {
|
||||
shader::Type type;
|
||||
std::string name;
|
||||
|
||||
MSLConstant(shader::Type const_type, std::string const_name) : type(const_type), name(const_name)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct MSLBufferBlock {
|
||||
std::string type_name;
|
||||
std::string name;
|
||||
@@ -401,6 +410,8 @@ class MSLGeneratorInterface {
|
||||
blender::Vector<MSLTextureResource> texture_samplers;
|
||||
blender::Vector<MSLVertexInputAttribute> vertex_input_attributes;
|
||||
blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings;
|
||||
/* Specialization Constants. */
|
||||
blender::Vector<MSLConstant> constants;
|
||||
/* Fragment tile inputs. */
|
||||
blender::Vector<MSLFragmentTileInputAttribute> fragment_tile_inputs;
|
||||
/* Should match vertex outputs, but defined separately as
|
||||
|
||||
@@ -869,6 +869,18 @@ char *MSLGeneratorInterface::msl_patch_default_get()
|
||||
return msl_patch_default;
|
||||
}
|
||||
|
||||
/* Specialization constants will evaluate using a dynamic value if provided at PSO compile time. */
|
||||
static void generate_specialization_constant_declarations(const shader::ShaderCreateInfo *info,
|
||||
std::stringstream &ss)
|
||||
{
|
||||
uint index = MTL_SHADER_SPECIALIZATION_CONSTANT_BASE_ID;
|
||||
for (const ShaderCreateInfo::SpecializationConstant &sc : info->specialization_constants_) {
|
||||
/* TODO(Metal): Output specialization constant chain. */
|
||||
ss << "constant " << sc.type << " " << sc.name << " [[function_constant(" << index << ")]];\n";
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
|
||||
{
|
||||
/* Verify if create-info is available.
|
||||
@@ -1048,6 +1060,10 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
|
||||
std::stringstream ss_vertex;
|
||||
std::stringstream ss_fragment;
|
||||
|
||||
/* Generate specialization constants. */
|
||||
generate_specialization_constant_declarations(info, ss_vertex);
|
||||
generate_specialization_constant_declarations(info, ss_fragment);
|
||||
|
||||
/*** Generate VERTEX Stage ***/
|
||||
/* Conditional defines. */
|
||||
if (msl_iface.use_argument_buffer_for_samplers()) {
|
||||
@@ -1506,6 +1522,8 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
|
||||
ss_compute << "#define GPU_ARB_texture_cube_map_array 1\n"
|
||||
"#define GPU_ARB_shader_draw_parameters 1\n";
|
||||
|
||||
generate_specialization_constant_declarations(info, ss_compute);
|
||||
|
||||
#ifndef NDEBUG
|
||||
extract_global_scope_constants(shd_builder_->glsl_compute_source_, ss_compute);
|
||||
#endif
|
||||
@@ -1664,7 +1682,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
|
||||
this->set_interface(msl_iface.bake_shader_interface(this->name));
|
||||
|
||||
/* Compute dims. */
|
||||
this->compute_pso_instance_.set_compute_workgroup_size(
|
||||
this->compute_pso_common_state_.set_compute_workgroup_size(
|
||||
max_ii(info->compute_layout_.local_size_x, 1),
|
||||
max_ii(info->compute_layout_.local_size_y, 1),
|
||||
max_ii(info->compute_layout_.local_size_z, 1));
|
||||
@@ -1792,6 +1810,11 @@ void MSLGeneratorInterface::prepare_from_createinfo(const shader::ShaderCreateIn
|
||||
uniforms.append(uniform);
|
||||
}
|
||||
|
||||
/** Prepare Constants. */
|
||||
for (const auto &constant : create_info_->specialization_constants_) {
|
||||
constants.append(MSLConstant(constant.type, constant.name));
|
||||
}
|
||||
|
||||
/* Prepare textures and uniform blocks.
|
||||
* Perform across both resource categories and extract both
|
||||
* texture samplers and image types. */
|
||||
@@ -3722,6 +3745,12 @@ MTLShaderInterface *MSLGeneratorInterface::bake_shader_interface(const char *nam
|
||||
tex_buf_ssbo_location);
|
||||
}
|
||||
|
||||
/* Specialization Constants. */
|
||||
for (const MSLConstant &constant : this->constants) {
|
||||
interface->add_constant(name_buffer_copystr(
|
||||
&interface->name_buffer_, constant.name.c_str(), name_buffer_size, name_buffer_offset));
|
||||
}
|
||||
|
||||
/* Sampler Parameters. */
|
||||
interface->set_sampler_properties(
|
||||
this->use_argument_buffer_for_samplers(),
|
||||
|
||||
@@ -130,6 +130,10 @@ struct MTLShaderUniform {
|
||||
uint32_t array_len;
|
||||
};
|
||||
|
||||
struct MTLShaderConstant {
|
||||
uint32_t name_offset;
|
||||
};
|
||||
|
||||
struct MTLShaderTexture {
|
||||
bool used;
|
||||
uint32_t name_offset;
|
||||
@@ -199,6 +203,10 @@ class MTLShaderInterface : public ShaderInterface {
|
||||
int max_texture_index_;
|
||||
MTLShaderTexture textures_[MTL_MAX_TEXTURE_SLOTS];
|
||||
|
||||
/* Specialization constants. */
|
||||
uint32_t total_constants_;
|
||||
Vector<MTLShaderConstant> constants_;
|
||||
|
||||
/* Whether argument buffers are used for sampler bindings. */
|
||||
bool sampler_use_argument_buffer_;
|
||||
int sampler_argument_buffer_bind_index_[3];
|
||||
@@ -241,6 +249,7 @@ class MTLShaderInterface : public ShaderInterface {
|
||||
ShaderStage stage_mask = ShaderStage::FRAGMENT,
|
||||
int tex_buffer_ssbo_location = -1);
|
||||
void add_push_constant_block(uint32_t name_offset);
|
||||
void add_constant(uint32_t name_offset);
|
||||
|
||||
/* Resolve and cache locations of builtin uniforms and uniform blocks. */
|
||||
void map_builtins();
|
||||
@@ -256,6 +265,9 @@ class MTLShaderInterface : public ShaderInterface {
|
||||
const MTLShaderUniform &get_uniform(uint index) const;
|
||||
uint32_t get_total_uniforms() const;
|
||||
|
||||
/* Fetch Constants. */
|
||||
uint32_t get_total_constants() const;
|
||||
|
||||
/* Fetch Uniform Blocks. */
|
||||
const MTLShaderBufferBlock &get_uniform_block(uint index) const;
|
||||
uint32_t get_total_uniform_blocks() const;
|
||||
|
||||
@@ -56,6 +56,7 @@ const char *MTLShaderInterface::get_name_at_offset(uint32_t offset) const
|
||||
void MTLShaderInterface::init()
|
||||
{
|
||||
total_attributes_ = 0;
|
||||
total_constants_ = 0;
|
||||
total_uniform_blocks_ = 0;
|
||||
max_uniformbuf_index_ = 0;
|
||||
total_storage_blocks_ = 0;
|
||||
@@ -256,6 +257,14 @@ void MTLShaderInterface::add_texture(uint32_t name_offset,
|
||||
}
|
||||
}
|
||||
|
||||
void MTLShaderInterface::add_constant(uint32_t name_offset)
|
||||
{
|
||||
MTLShaderConstant constant;
|
||||
constant.name_offset = name_offset;
|
||||
constants_.append(constant);
|
||||
total_constants_++;
|
||||
}
|
||||
|
||||
void MTLShaderInterface::map_builtins()
|
||||
{
|
||||
/* Clear builtin arrays to NULL locations. */
|
||||
@@ -311,10 +320,11 @@ void MTLShaderInterface::prepare_common_shader_inputs()
|
||||
ubo_len_ = this->get_total_uniform_blocks();
|
||||
uniform_len_ = this->get_total_uniforms() + this->get_total_textures();
|
||||
ssbo_len_ = this->get_total_storage_blocks();
|
||||
constant_len_ = this->get_total_constants();
|
||||
|
||||
/* Calculate total inputs and allocate #ShaderInput array. */
|
||||
/* NOTE: We use the existing `name_buffer_` allocated for internal input structs. */
|
||||
int input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_;
|
||||
int input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_ + constant_len_;
|
||||
inputs_ = (ShaderInput *)MEM_callocN(sizeof(ShaderInput) * input_tot_len, __func__);
|
||||
ShaderInput *current_input = inputs_;
|
||||
|
||||
@@ -408,6 +418,17 @@ void MTLShaderInterface::prepare_common_shader_inputs()
|
||||
current_input++;
|
||||
}
|
||||
|
||||
/* Specialization Constants. */
|
||||
BLI_assert(&inputs_[attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_] >= current_input);
|
||||
current_input = &inputs_[attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_];
|
||||
for (const int const_index : IndexRange(constant_len_)) {
|
||||
MTLShaderConstant &shd_const = constants_[const_index];
|
||||
current_input->name_offset = shd_const.name_offset;
|
||||
current_input->name_hash = BLI_hash_string(this->get_name_at_offset(shd_const.name_offset));
|
||||
current_input->location = const_index;
|
||||
current_input++;
|
||||
}
|
||||
|
||||
this->sort_inputs();
|
||||
|
||||
/* Map builtin uniform indices to uniform binding locations. */
|
||||
@@ -460,6 +481,11 @@ uint32_t MTLShaderInterface::get_total_attributes() const
|
||||
return total_attributes_;
|
||||
}
|
||||
|
||||
uint32_t MTLShaderInterface::get_total_constants() const
|
||||
{
|
||||
return total_constants_;
|
||||
}
|
||||
|
||||
uint32_t MTLShaderInterface::get_total_vertex_stride() const
|
||||
{
|
||||
return total_vert_stride_;
|
||||
|
||||
@@ -619,6 +619,29 @@ std::string GLShader::resources_declare(const ShaderCreateInfo &info) const
|
||||
* are reused for local variables. This is to match other backend behavior which needs accessors
|
||||
* macros. */
|
||||
|
||||
ss << "\n/* Specialization Constants (pass-through). */\n";
|
||||
for (const ShaderCreateInfo::SpecializationConstant &sc : info.specialization_constants_) {
|
||||
ss << "#define " << sc.name;
|
||||
switch (sc.type) {
|
||||
case Type::INT:
|
||||
ss << " " << std::to_string(sc.default_value.i) << "\n";
|
||||
break;
|
||||
case Type::UINT:
|
||||
ss << " " << std::to_string(sc.default_value.u) << "u\n";
|
||||
break;
|
||||
case Type::BOOL:
|
||||
ss << " bool(" << std::to_string(sc.default_value.u) << ")\n";
|
||||
break;
|
||||
case Type::FLOAT:
|
||||
/* Use uint representation to allow exact same bit pattern even if NaN. */
|
||||
ss << " uintBitsToFloat(" << std::to_string(sc.default_value.u) << ")\n";
|
||||
break;
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ss << "\n/* Pass Resources. */\n";
|
||||
for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
|
||||
print_resource(ss, res, info.auto_resource_location_);
|
||||
|
||||
@@ -390,6 +390,7 @@ GLShaderInterface::GLShaderInterface(GLuint program, const shader::ShaderCreateI
|
||||
|
||||
attr_len_ = info.vertex_inputs_.size();
|
||||
uniform_len_ = info.push_constants_.size();
|
||||
constant_len_ = info.specialization_constants_.size();
|
||||
ubo_len_ = 0;
|
||||
ssbo_len_ = 0;
|
||||
|
||||
@@ -430,7 +431,7 @@ GLShaderInterface::GLShaderInterface(GLuint program, const shader::ShaderCreateI
|
||||
|
||||
BLI_assert_msg(ubo_len_ <= 16, "enabled_ubo_mask_ is uint16_t");
|
||||
|
||||
int input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_;
|
||||
int input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_ + constant_len_;
|
||||
inputs_ = (ShaderInput *)MEM_callocN(sizeof(ShaderInput) * input_tot_len, __func__);
|
||||
ShaderInput *input = inputs_;
|
||||
|
||||
@@ -528,6 +529,14 @@ GLShaderInterface::GLShaderInterface(GLuint program, const shader::ShaderCreateI
|
||||
}
|
||||
}
|
||||
|
||||
/* Constants */
|
||||
int constant_id = 0;
|
||||
for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) {
|
||||
copy_input_name(input, constant.name, name_buffer_, name_buffer_offset);
|
||||
input->location = constant_id++;
|
||||
input++;
|
||||
}
|
||||
|
||||
this->sort_inputs();
|
||||
|
||||
/* Resolving builtins must happen after the inputs have been sorted. */
|
||||
|
||||
@@ -93,6 +93,29 @@ GPU_SHADER_CREATE_INFO(gpu_buffer_texture_test)
|
||||
.compute_source("gpu_buffer_texture_test.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
/* Specialization constants. */
|
||||
|
||||
GPU_SHADER_CREATE_INFO(gpu_specialization_constants_base_test)
|
||||
.storage_buf(0, Qualifier::WRITE, "int", "data_out[]")
|
||||
.specialization_constant(Type::FLOAT, "float_in", 2)
|
||||
.specialization_constant(Type::UINT, "uint_in", 3)
|
||||
.specialization_constant(Type::INT, "int_in", 4)
|
||||
.specialization_constant(Type::BOOL, "bool_in", true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(gpu_compute_specialization_test)
|
||||
.local_group_size(1)
|
||||
.additional_info("gpu_specialization_constants_base_test")
|
||||
.compute_source("gpu_specialization_test.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
GPU_SHADER_CREATE_INFO(gpu_graphic_specialization_test)
|
||||
.additional_info("gpu_specialization_constants_base_test")
|
||||
.vertex_source("gpu_specialization_test.glsl")
|
||||
.fragment_source("gpu_specialization_test.glsl")
|
||||
.do_static_compilation(true);
|
||||
|
||||
/* EEVEE test. */
|
||||
|
||||
GPU_SHADER_CREATE_INFO(eevee_shadow_test)
|
||||
.fragment_source("eevee_shadow_test.glsl")
|
||||
.additional_info("gpu_shader_test")
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#if defined(GPU_COMPUTE_SHADER) || defined(GPU_VERTEX_SHADER)
|
||||
|
||||
void main()
|
||||
{
|
||||
data_out[0] = int(float_in);
|
||||
data_out[1] = int(uint_in);
|
||||
data_out[2] = int(int_in);
|
||||
data_out[3] = int(bool_in);
|
||||
|
||||
# if defined(GPU_VERTEX_SHADER)
|
||||
gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
# endif
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void main() {}
|
||||
|
||||
#endif
|
||||
148
source/blender/gpu/tests/specialization_constants_test.cc
Normal file
148
source/blender/gpu/tests/specialization_constants_test.cc
Normal file
@@ -0,0 +1,148 @@
|
||||
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include "testing/testing.h"
|
||||
|
||||
#include "GPU_batch.h"
|
||||
#include "GPU_capabilities.h"
|
||||
#include "GPU_compute.h"
|
||||
#include "GPU_context.h"
|
||||
#include "GPU_framebuffer.h"
|
||||
#include "GPU_shader.h"
|
||||
#include "GPU_storage_buffer.h"
|
||||
|
||||
#include "BLI_math_vector.hh"
|
||||
#include "BLI_utility_mixins.hh"
|
||||
#include "BLI_vector.hh"
|
||||
|
||||
#include "gpu_shader_create_info.hh"
|
||||
#include "gpu_shader_create_info_private.hh"
|
||||
#include "gpu_testing.hh"
|
||||
|
||||
namespace blender::gpu::tests {
|
||||
|
||||
struct ShaderSpecializationConst {
|
||||
GPUShader *shader = nullptr;
|
||||
GPUStorageBuf *ssbo = nullptr;
|
||||
Vector<int> data;
|
||||
|
||||
float float_in;
|
||||
uint uint_in;
|
||||
int int_in;
|
||||
bool bool_in;
|
||||
|
||||
bool is_graphic = false;
|
||||
|
||||
ShaderSpecializationConst(const char *info_name)
|
||||
{
|
||||
if (!GPU_compute_shader_support()) {
|
||||
/* We can't test as a the platform does not support compute shaders. */
|
||||
std::cout << "Skipping test: platform not supported";
|
||||
return;
|
||||
}
|
||||
|
||||
GPU_render_begin();
|
||||
|
||||
this->init_shader(info_name);
|
||||
|
||||
GPU_storagebuf_bind(ssbo, GPU_shader_get_ssbo_binding(shader, "data_out"));
|
||||
|
||||
/* Expect defaults. */
|
||||
float_in = 2;
|
||||
uint_in = 3;
|
||||
int_in = 4;
|
||||
bool_in = true;
|
||||
|
||||
this->validate();
|
||||
|
||||
/* Test values. */
|
||||
float_in = 52;
|
||||
uint_in = 324;
|
||||
int_in = 455;
|
||||
bool_in = false;
|
||||
|
||||
GPU_shader_constant_float(shader, "float_in", float_in);
|
||||
GPU_shader_constant_uint(shader, "uint_in", uint_in);
|
||||
GPU_shader_constant_int(shader, "int_in", int_in);
|
||||
GPU_shader_constant_bool(shader, "bool_in", bool_in);
|
||||
|
||||
this->validate();
|
||||
|
||||
GPU_render_end();
|
||||
}
|
||||
|
||||
~ShaderSpecializationConst()
|
||||
{
|
||||
if (shader != nullptr) {
|
||||
GPU_shader_unbind();
|
||||
GPU_shader_free(shader);
|
||||
}
|
||||
if (ssbo != nullptr) {
|
||||
GPU_storagebuf_free(ssbo);
|
||||
}
|
||||
}
|
||||
|
||||
void init_shader(const char *info_name)
|
||||
{
|
||||
using namespace blender::gpu::shader;
|
||||
|
||||
uint data_len = 4;
|
||||
ssbo = GPU_storagebuf_create_ex(data_len * sizeof(int), nullptr, GPU_USAGE_STREAM, __func__);
|
||||
data.resize(data_len);
|
||||
|
||||
const GPUShaderCreateInfo *_info = gpu_shader_create_info_get(info_name);
|
||||
const ShaderCreateInfo &info = *reinterpret_cast<const ShaderCreateInfo *>(_info);
|
||||
is_graphic = info.compute_source_.is_empty();
|
||||
shader = GPU_shader_create_from_info_name(info_name);
|
||||
EXPECT_NE(shader, nullptr);
|
||||
}
|
||||
|
||||
void validate()
|
||||
{
|
||||
if (is_graphic) {
|
||||
GPUFrameBuffer *fb = GPU_framebuffer_create("test_fb");
|
||||
GPU_framebuffer_default_size(fb, 1, 1);
|
||||
GPU_framebuffer_bind(fb);
|
||||
|
||||
/* TODO(fclem): remove this boilerplate. */
|
||||
GPUVertFormat format{};
|
||||
GPU_vertformat_attr_add(&format, "dummy", GPU_COMP_U32, 1, GPU_FETCH_INT);
|
||||
GPUVertBuf *verts = GPU_vertbuf_create_with_format(&format);
|
||||
|
||||
GPU_vertbuf_data_alloc(verts, 1);
|
||||
GPUBatch *batch = GPU_batch_create_ex(GPU_PRIM_POINTS, verts, nullptr, GPU_BATCH_OWNS_VBO);
|
||||
GPU_batch_set_shader(batch, shader);
|
||||
GPU_batch_draw_advanced(batch, 0, 1, 0, 1);
|
||||
GPU_batch_discard(batch);
|
||||
|
||||
GPU_framebuffer_free(fb);
|
||||
}
|
||||
else {
|
||||
GPU_compute_dispatch(shader, 1, 1, 1);
|
||||
}
|
||||
|
||||
GPU_finish();
|
||||
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
|
||||
GPU_storagebuf_read(ssbo, data.data());
|
||||
|
||||
EXPECT_EQ(data[0], int(float_in));
|
||||
EXPECT_EQ(data[1], int(uint_in));
|
||||
EXPECT_EQ(data[2], int(int_in));
|
||||
EXPECT_EQ(data[3], int(bool_in));
|
||||
}
|
||||
};
|
||||
|
||||
static void test_specialization_constants_compute()
|
||||
{
|
||||
ShaderSpecializationConst("gpu_compute_specialization_test");
|
||||
}
|
||||
GPU_TEST(specialization_constants_compute)
|
||||
|
||||
static void test_specialization_constants_graphic()
|
||||
{
|
||||
ShaderSpecializationConst("gpu_graphic_specialization_test");
|
||||
}
|
||||
GPU_TEST(specialization_constants_graphic)
|
||||
|
||||
} // namespace blender::gpu::tests
|
||||
@@ -1001,6 +1001,30 @@ std::string VKShader::resources_declare(const shader::ShaderCreateInfo &info) co
|
||||
interface.init(info);
|
||||
std::stringstream ss;
|
||||
|
||||
/* TODO: Add support for specialization constants at compile time. */
|
||||
ss << "\n/* Specialization Constants (pass-through). */\n";
|
||||
for (const ShaderCreateInfo::SpecializationConstant &sc : info.specialization_constants_) {
|
||||
ss << "#define " << sc.name;
|
||||
switch (sc.type) {
|
||||
case Type::INT:
|
||||
ss << " " << std::to_string(sc.default_value.i) << "\n";
|
||||
break;
|
||||
case Type::UINT:
|
||||
ss << " " << std::to_string(sc.default_value.u) << "u\n";
|
||||
break;
|
||||
case Type::BOOL:
|
||||
ss << " bool(" << std::to_string(sc.default_value.u) << ")\n";
|
||||
break;
|
||||
case Type::FLOAT:
|
||||
/* Use uint representation to allow exact same bit pattern even if NaN. */
|
||||
ss << " uintBitsToFloat(" << std::to_string(sc.default_value.u) << ")\n";
|
||||
break;
|
||||
default:
|
||||
BLI_assert_unreachable();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ss << "\n/* Pass Resources. */\n";
|
||||
for (const ShaderCreateInfo::Resource &res : info.pass_resources_) {
|
||||
print_resource(ss, interface, res);
|
||||
|
||||
@@ -22,6 +22,7 @@ void VKShaderInterface::init(const shader::ShaderCreateInfo &info)
|
||||
|
||||
attr_len_ = info.vertex_inputs_.size();
|
||||
uniform_len_ = info.push_constants_.size();
|
||||
constant_len_ = info.specialization_constants_.size();
|
||||
ssbo_len_ = 0;
|
||||
ubo_len_ = 0;
|
||||
image_offset_ = -1;
|
||||
@@ -65,7 +66,7 @@ void VKShaderInterface::init(const shader::ShaderCreateInfo &info)
|
||||
image_offset_ = image_max_binding + 1;
|
||||
}
|
||||
|
||||
int32_t input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_;
|
||||
int32_t input_tot_len = attr_len_ + ubo_len_ + uniform_len_ + ssbo_len_ + constant_len_;
|
||||
inputs_ = static_cast<ShaderInput *>(
|
||||
MEM_calloc_arrayN(input_tot_len, sizeof(ShaderInput), __func__));
|
||||
ShaderInput *input = inputs_;
|
||||
@@ -135,6 +136,14 @@ void VKShaderInterface::init(const shader::ShaderCreateInfo &info)
|
||||
}
|
||||
}
|
||||
|
||||
/* Constants */
|
||||
int constant_id = 0;
|
||||
for (const ShaderCreateInfo::SpecializationConstant &constant : info.specialization_constants_) {
|
||||
copy_input_name(input, constant.name, name_buffer_, name_buffer_offset);
|
||||
input->location = constant_id++;
|
||||
input++;
|
||||
}
|
||||
|
||||
sort_inputs();
|
||||
|
||||
/* Builtin Uniforms */
|
||||
|
||||
Reference in New Issue
Block a user