From fb3904ce45c6932387a373efe05dd8df57035a07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cle=CC=81ment=20Foucault?= Date: Mon, 22 Sep 2025 10:24:10 +0200 Subject: [PATCH] GPU: Shader Codegen: Split different graph includes This allows to reduce the number of includes for each tree graph (surface, volume, displacement) and reduce the code size significantly for most vertex shaders, speeding up compile time. Rel #145347 Pull Request: https://projects.blender.org/blender/blender/pulls/146419 --- .../compositor/intern/shader_operation.cc | 7 +- .../draw/engines/eevee/eevee_shader.cc | 32 +++++--- source/blender/gpu/GPU_material.hh | 29 ++++++-- source/blender/gpu/intern/gpu_codegen.cc | 73 ++++++++----------- source/blender/gpu/intern/gpu_codegen.hh | 14 ++-- source/blender/gpu/intern/gpu_material.cc | 6 +- .../gpu/intern/gpu_material_library.hh | 7 +- source/blender/gpu/intern/gpu_node_graph.cc | 9 +-- source/blender/gpu/intern/gpu_node_graph.hh | 3 - source/blender/gpu/intern/gpu_pass.cc | 1 - .../gpu/intern/gpu_shader_dependency.cc | 12 ++- 11 files changed, 104 insertions(+), 89 deletions(-) diff --git a/source/blender/compositor/intern/shader_operation.cc b/source/blender/compositor/intern/shader_operation.cc index 18d3c7b4169..e2726b1cd70 100644 --- a/source/blender/compositor/intern/shader_operation.cc +++ b/source/blender/compositor/intern/shader_operation.cc @@ -549,12 +549,13 @@ void ShaderOperation::generate_code(void *thunk, eval_code += operation->generate_code_for_inputs(material, shader_create_info); - eval_code += code_generator_output->composite; + eval_code += code_generator_output->composite.serialized; eval_code += "}\n"; - shader_create_info.generated_sources.append( - {"gpu_shader_compositor_eval.glsl", shader_create_info.dependencies_generated, eval_code}); + shader_create_info.generated_sources.append({"gpu_shader_compositor_eval.glsl", + code_generator_output->composite.dependencies, + eval_code}); } /* Texture storers in the shader always take a [i]vec4 as an argument, so encode each type in an diff --git a/source/blender/draw/engines/eevee/eevee_shader.cc b/source/blender/draw/engines/eevee/eevee_shader.cc index da6948625b5..ff40f32fac6 100644 --- a/source/blender/draw/engines/eevee/eevee_shader.cc +++ b/source/blender/draw/engines/eevee/eevee_shader.cc @@ -903,17 +903,26 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut vert_gen << "float3 nodetree_displacement()\n"; vert_gen << "{\n"; - vert_gen << ((use_vertex_displacement) ? codegen.displacement : "return float3(0);\n"); + vert_gen << ((use_vertex_displacement) ? codegen.displacement.serialized : + "return float3(0);\n"); vert_gen << "}\n\n"; - Vector dependencies = {"eevee_nodetree_lib.glsl"}; - dependencies.extend(info.dependencies_generated); + Vector dependencies = {}; + if (use_vertex_displacement) { + dependencies.append("eevee_nodetree_lib.glsl"); + dependencies.extend(codegen.displacement.dependencies); + } info.generated_sources.append({"eevee_nodetree_vert_lib.glsl", dependencies, vert_gen.str()}); } if (pipeline_type != MAT_PIPE_VOLUME_OCCUPANCY) { - frag_gen << (!codegen.material_functions.empty() ? codegen.material_functions : "\n"); + Vector dependencies = {"eevee_nodetree_lib.glsl"}; + + for (const auto &graph : codegen.material_functions) { + frag_gen << graph.serialized; + dependencies.extend(graph.dependencies); + } if (!codegen.displacement.empty()) { /* Bump displacement. Needed to recompute normals after displacement. */ @@ -921,14 +930,16 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut frag_gen << "float3 nodetree_displacement()\n"; frag_gen << "{\n"; - frag_gen << codegen.displacement; + frag_gen << codegen.displacement.serialized; + dependencies.extend(codegen.displacement.dependencies); frag_gen << "}\n\n"; } frag_gen << "Closure nodetree_surface(float closure_rand)\n"; frag_gen << "{\n"; frag_gen << " closure_weights_reset(closure_rand);\n"; - frag_gen << (!codegen.surface.empty() ? codegen.surface : "return Closure(0);\n"); + frag_gen << codegen.surface.serialized_or_default("return Closure(0);\n"); + dependencies.extend(codegen.surface.dependencies); frag_gen << "}\n\n"; /* TODO(fclem): Find a way to pass material parameters inside the material UBO. */ @@ -960,19 +971,18 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut } } else { - frag_gen << codegen.thickness; + frag_gen << codegen.thickness.serialized; + dependencies.extend(codegen.thickness.dependencies); } frag_gen << "}\n\n"; frag_gen << "Closure nodetree_volume()\n"; frag_gen << "{\n"; frag_gen << " closure_weights_reset(0.0);\n"; - frag_gen << (!codegen.volume.empty() ? codegen.volume : "return Closure(0);\n"); + frag_gen << codegen.volume.serialized_or_default("return Closure(0);\n"); + dependencies.extend(codegen.volume.dependencies); frag_gen << "}\n\n"; - Vector dependencies = {"eevee_nodetree_lib.glsl"}; - dependencies.extend(info.dependencies_generated); - info.generated_sources.append({"eevee_nodetree_frag_lib.glsl", dependencies, frag_gen.str()}); } diff --git a/source/blender/gpu/GPU_material.hh b/source/blender/gpu/GPU_material.hh index 039f9bde8ef..f86b09d7c13 100644 --- a/source/blender/gpu/GPU_material.hh +++ b/source/blender/gpu/GPU_material.hh @@ -10,6 +10,8 @@ #include +#include "BLI_set.hh" + #include "DNA_customdata_types.h" /* for eCustomDataType */ #include "DNA_image_types.h" #include "DNA_listBase.h" @@ -295,15 +297,30 @@ struct GPUNodeStack { bool end; }; +struct GPUGraphOutput { + std::string serialized; + blender::Vector dependencies; + + bool empty() const + { + return serialized.empty(); + } + + std::string serialized_or_default(std::string value) const + { + return serialized.empty() ? value : serialized; + } +}; + struct GPUCodegenOutput { std::string attr_load; /* Node-tree functions calls. */ - std::string displacement; - std::string surface; - std::string volume; - std::string thickness; - std::string composite; - std::string material_functions; + GPUGraphOutput displacement; + GPUGraphOutput surface; + GPUGraphOutput volume; + GPUGraphOutput thickness; + GPUGraphOutput composite; + blender::Vector material_functions; GPUShaderCreateInfo *create_info; }; diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index 9d241174356..b4466841dde 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -26,6 +26,7 @@ #include "GPU_vertex_format.hh" #include "gpu_codegen.hh" +#include "gpu_material_library.hh" #include "gpu_shader_dependency_private.hh" #include @@ -289,34 +290,12 @@ void GPUCodegen::generate_resources() info.typedef_source_generated = ss.str(); } -void GPUCodegen::generate_library() +void GPUCodegen::node_serialize(Set &used_libraries, + std::stringstream &eval_ss, + const GPUNode *node) { - GPUCodegenCreateInfo &info = *create_info; + gpu_material_library_use_function(used_libraries, node->name); - void *value; - Vector source_files; - - /* Iterate over libraries. We need to keep this struct intact in case it is required for the - * optimization pass. The first pass just collects the keys from the GSET, given items in a GSET - * are unordered this can cause order differences between invocations, so we collect the keys - * first, and sort them before doing actual work, to guarantee stable behavior while still - * having cheap insertions into the GSET */ - GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries); - while (!BLI_ghashIterator_done(ihash)) { - value = BLI_ghashIterator_getKey(ihash); - source_files.append((const char *)value); - BLI_ghashIterator_step(ihash); - } - BLI_ghashIterator_free(ihash); - - std::sort(source_files.begin(), source_files.end()); - for (auto &key : source_files) { - info.dependencies_generated.append_non_duplicates(key); - } -} - -void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) -{ /* Declare constants. */ LISTBASE_FOREACH (GPUInput *, input, &node->inputs) { switch (input->source) { @@ -392,14 +371,26 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) nodes_total_++; } -std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag, - GPUNodeLink *output_link, - const char *output_default) +static Vector set_to_vector_stable(Set &set) +{ + Vector source_files; + for (const StringRefNull &str : set) { + source_files.append(str); + } + /* Sort dependencies to avoid random order causing shader caching to fail (see #108289). */ + std::sort(source_files.begin(), source_files.end()); + return source_files; +} + +GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag, + GPUNodeLink *output_link, + const char *output_default) { if (output_link == nullptr && output_default == nullptr) { - return ""; + return {}; } + Set used_libraries; std::stringstream eval_ss; bool has_nodes = false; /* NOTE: The node order is already top to bottom (or left to right in node editor) @@ -408,12 +399,12 @@ std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag, if ((node->tag & tree_tag) == 0) { continue; } - node_serialize(eval_ss, node); + node_serialize(used_libraries, eval_ss, node); has_nodes = true; } if (!has_nodes) { - return ""; + return {}; } if (output_link) { @@ -426,20 +417,21 @@ std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag, std::string str = eval_ss.str(); BLI_hash_mm2a_add(&hm2a_, reinterpret_cast(str.c_str()), str.size()); - return str; + return {str, set_to_vector_stable(used_libraries)}; } -std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag) +GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag) { std::stringstream eval_ss; + Set used_libraries; LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) { if (node->tag & tree_tag) { - node_serialize(eval_ss, node); + node_serialize(used_libraries, eval_ss, node); } } std::string str = eval_ss.str(); BLI_hash_mm2a_add(&hm2a_, reinterpret_cast(str.c_str()), str.size()); - return str; + return {str, set_to_vector_stable(used_libraries)}; } void GPUCodegen::generate_cryptomatte() @@ -507,19 +499,18 @@ void GPUCodegen::generate_graphs() } if (!BLI_listbase_is_empty(&graph.material_functions)) { - std::stringstream eval_ss; - eval_ss << "\n/* Generated Functions */\n\n"; LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) { + std::stringstream eval_ss; /* Untag every node in the graph to avoid serializing nodes from other functions */ LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) { node->tag &= ~GPU_NODE_TAG_FUNCTION; } /* Tag only the nodes needed for the current function */ gpu_nodes_tag(func_link->outlink, GPU_NODE_TAG_FUNCTION); - const std::string fn = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink); - eval_ss << "float " << func_link->name << "() {\n" << fn << "}\n\n"; + GPUGraphOutput graph = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink); + eval_ss << "float " << func_link->name << "() {\n" << graph.serialized << "}\n\n"; + output.material_functions.append({eval_ss.str(), graph.dependencies}); } - output.material_functions = eval_ss.str(); /* Leave the function tags as they were before serialization */ LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph.material_functions) { gpu_nodes_tag(funclink->outlink, GPU_NODE_TAG_FUNCTION); diff --git a/source/blender/gpu/intern/gpu_codegen.hh b/source/blender/gpu/intern/gpu_codegen.hh index 0ab390aa95b..040e1b5092c 100644 --- a/source/blender/gpu/intern/gpu_codegen.hh +++ b/source/blender/gpu/intern/gpu_codegen.hh @@ -12,6 +12,7 @@ #include "BLI_hash_mm2a.hh" #include "BLI_listbase.h" +#include "BLI_set.hh" #include "BLI_vector.hh" #include "GPU_material.hh" @@ -82,7 +83,6 @@ class GPUCodegen { void generate_uniform_buffer(); void generate_attribs(); void generate_resources(); - void generate_library(); uint32_t hash_get() const { @@ -96,11 +96,13 @@ class GPUCodegen { private: void set_unique_ids(); - void node_serialize(std::stringstream &eval_ss, const GPUNode *node); - std::string graph_serialize(GPUNodeTag tree_tag, - GPUNodeLink *output_link, - const char *output_default = nullptr); - std::string graph_serialize(GPUNodeTag tree_tag); + void node_serialize(blender::Set &used_libraries, + std::stringstream &eval_ss, + const GPUNode *node); + GPUGraphOutput graph_serialize(GPUNodeTag tree_tag, + GPUNodeLink *output_link, + const char *output_default = nullptr); + GPUGraphOutput graph_serialize(GPUNodeTag tree_tag); }; } // namespace blender::gpu::shader diff --git a/source/blender/gpu/intern/gpu_material.cc b/source/blender/gpu/intern/gpu_material.cc index 5c3539d6c55..d67647e834c 100644 --- a/source/blender/gpu/intern/gpu_material.cc +++ b/source/blender/gpu/intern/gpu_material.cc @@ -99,11 +99,7 @@ struct GPUMaterial { std::string name; - GPUMaterial(eGPUMaterialEngine engine) : engine(engine) - { - graph.used_libraries = BLI_gset_new( - BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); - }; + GPUMaterial(eGPUMaterialEngine engine) : engine(engine){}; ~GPUMaterial() { diff --git a/source/blender/gpu/intern/gpu_material_library.hh b/source/blender/gpu/intern/gpu_material_library.hh index af92c344cbf..4c4643c2165 100644 --- a/source/blender/gpu/intern/gpu_material_library.hh +++ b/source/blender/gpu/intern/gpu_material_library.hh @@ -9,13 +9,12 @@ #pragma once +#include "BLI_set.hh" #include "GPU_material.hh" #define MAX_FUNCTION_NAME 64 #define MAX_PARAMETER 36 -struct GSet; - enum GPUFunctionQual { FUNCTION_QUAL_IN, FUNCTION_QUAL_OUT, @@ -31,4 +30,6 @@ struct GPUFunction { void *source; /* GPUSource */ }; -GPUFunction *gpu_material_library_use_function(GSet *used_libraries, const char *name); +GPUFunction *gpu_material_library_get_function(const char *name); +void gpu_material_library_use_function(blender::Set &used_libraries, + const char *name); diff --git a/source/blender/gpu/intern/gpu_node_graph.cc b/source/blender/gpu/intern/gpu_node_graph.cc index a5f81c60461..c92c5202163 100644 --- a/source/blender/gpu/intern/gpu_node_graph.cc +++ b/source/blender/gpu/intern/gpu_node_graph.cc @@ -747,7 +747,7 @@ bool GPU_link(GPUMaterial *mat, const char *name, ...) va_list params; int i; - function = gpu_material_library_use_function(graph->used_libraries, name); + function = gpu_material_library_get_function(name); if (!function) { fprintf(stderr, "GPU failed to find function %s\n", name); return false; @@ -786,7 +786,7 @@ static bool gpu_stack_link_v(GPUMaterial *material, GPUNodeLink *link, **linkptr; int i, totin, totout; - function = gpu_material_library_use_function(graph->used_libraries, name); + function = gpu_material_library_get_function(name); if (!function) { fprintf(stderr, "GPU failed to find function %s\n", name); return false; @@ -930,11 +930,6 @@ void gpu_node_graph_free(GPUNodeGraph *graph) BLI_freelistN(&graph->attributes); GPU_uniform_attr_list_free(&graph->uniform_attrs); BLI_freelistN(&graph->layer_attrs); - - if (graph->used_libraries) { - BLI_gset_free(graph->used_libraries, nullptr); - graph->used_libraries = nullptr; - } } /* Prune Unused Nodes */ diff --git a/source/blender/gpu/intern/gpu_node_graph.hh b/source/blender/gpu/intern/gpu_node_graph.hh index 4589e3bacd1..bcaf67bbfc4 100644 --- a/source/blender/gpu/intern/gpu_node_graph.hh +++ b/source/blender/gpu/intern/gpu_node_graph.hh @@ -177,9 +177,6 @@ struct GPUNodeGraph { /* The list of layer attributes. */ ListBase layer_attrs; - - /** Set of all the GLSL lib code blocks. */ - GSet *used_libraries; }; /* Node Graph */ diff --git a/source/blender/gpu/intern/gpu_pass.cc b/source/blender/gpu/intern/gpu_pass.cc index 7419293b1e4..6fcb0022cf6 100644 --- a/source/blender/gpu/intern/gpu_pass.cc +++ b/source/blender/gpu/intern/gpu_pass.cc @@ -383,7 +383,6 @@ GPUPass *GPU_generate_pass(GPUMaterial *material, /* The shader is not compiled, continue generating the shader strings. */ codegen.generate_attribs(); codegen.generate_resources(); - codegen.generate_library(); /* Make engine add its own code and implement the generated functions. */ finalize_source_cb(thunk, material, &codegen.output); diff --git a/source/blender/gpu/intern/gpu_shader_dependency.cc b/source/blender/gpu/intern/gpu_shader_dependency.cc index ba79425fcd8..12de7e94825 100644 --- a/source/blender/gpu/intern/gpu_shader_dependency.cc +++ b/source/blender/gpu/intern/gpu_shader_dependency.cc @@ -510,15 +510,21 @@ void gpu_shader_dependency_exit() g_functions = nullptr; } -GPUFunction *gpu_material_library_use_function(GSet *used_libraries, const char *name) +GPUFunction *gpu_material_library_get_function(const char *name) { GPUFunction *function = g_functions->lookup_default(name, nullptr); BLI_assert_msg(function != nullptr, "Requested function not in the function library"); - GPUSource *source = reinterpret_cast(function->source); - BLI_gset_add(used_libraries, const_cast(source->filename.c_str())); return function; } +void gpu_material_library_use_function(blender::Set &used_libraries, + const char *name) +{ + GPUFunction *function = g_functions->lookup_default(name, nullptr); + GPUSource *source = reinterpret_cast(function->source); + used_libraries.add(source->filename.c_str()); +} + namespace blender::gpu::shader { bool gpu_shader_dependency_force_gpu_print_injection()