GPU: Shader Codegen: Split different graph includes

This allows to reduce the number of includes for each
tree graph (surface, volume, displacement) and
reduce the code size significantly for most vertex
shaders, speeding up compile time.

Rel #145347

Pull Request: https://projects.blender.org/blender/blender/pulls/146419
This commit is contained in:
Clément Foucault
2025-09-22 10:24:10 +02:00
committed by Clément Foucault
parent 1509985013
commit fb3904ce45
11 changed files with 104 additions and 89 deletions

View File

@@ -549,12 +549,13 @@ void ShaderOperation::generate_code(void *thunk,
eval_code += operation->generate_code_for_inputs(material, shader_create_info);
eval_code += code_generator_output->composite;
eval_code += code_generator_output->composite.serialized;
eval_code += "}\n";
shader_create_info.generated_sources.append(
{"gpu_shader_compositor_eval.glsl", shader_create_info.dependencies_generated, eval_code});
shader_create_info.generated_sources.append({"gpu_shader_compositor_eval.glsl",
code_generator_output->composite.dependencies,
eval_code});
}
/* Texture storers in the shader always take a [i]vec4 as an argument, so encode each type in an

View File

@@ -903,17 +903,26 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
vert_gen << "float3 nodetree_displacement()\n";
vert_gen << "{\n";
vert_gen << ((use_vertex_displacement) ? codegen.displacement : "return float3(0);\n");
vert_gen << ((use_vertex_displacement) ? codegen.displacement.serialized :
"return float3(0);\n");
vert_gen << "}\n\n";
Vector<StringRefNull> dependencies = {"eevee_nodetree_lib.glsl"};
dependencies.extend(info.dependencies_generated);
Vector<StringRefNull> dependencies = {};
if (use_vertex_displacement) {
dependencies.append("eevee_nodetree_lib.glsl");
dependencies.extend(codegen.displacement.dependencies);
}
info.generated_sources.append({"eevee_nodetree_vert_lib.glsl", dependencies, vert_gen.str()});
}
if (pipeline_type != MAT_PIPE_VOLUME_OCCUPANCY) {
frag_gen << (!codegen.material_functions.empty() ? codegen.material_functions : "\n");
Vector<StringRefNull> dependencies = {"eevee_nodetree_lib.glsl"};
for (const auto &graph : codegen.material_functions) {
frag_gen << graph.serialized;
dependencies.extend(graph.dependencies);
}
if (!codegen.displacement.empty()) {
/* Bump displacement. Needed to recompute normals after displacement. */
@@ -921,14 +930,16 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
frag_gen << "float3 nodetree_displacement()\n";
frag_gen << "{\n";
frag_gen << codegen.displacement;
frag_gen << codegen.displacement.serialized;
dependencies.extend(codegen.displacement.dependencies);
frag_gen << "}\n\n";
}
frag_gen << "Closure nodetree_surface(float closure_rand)\n";
frag_gen << "{\n";
frag_gen << " closure_weights_reset(closure_rand);\n";
frag_gen << (!codegen.surface.empty() ? codegen.surface : "return Closure(0);\n");
frag_gen << codegen.surface.serialized_or_default("return Closure(0);\n");
dependencies.extend(codegen.surface.dependencies);
frag_gen << "}\n\n";
/* TODO(fclem): Find a way to pass material parameters inside the material UBO. */
@@ -960,19 +971,18 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
}
}
else {
frag_gen << codegen.thickness;
frag_gen << codegen.thickness.serialized;
dependencies.extend(codegen.thickness.dependencies);
}
frag_gen << "}\n\n";
frag_gen << "Closure nodetree_volume()\n";
frag_gen << "{\n";
frag_gen << " closure_weights_reset(0.0);\n";
frag_gen << (!codegen.volume.empty() ? codegen.volume : "return Closure(0);\n");
frag_gen << codegen.volume.serialized_or_default("return Closure(0);\n");
dependencies.extend(codegen.volume.dependencies);
frag_gen << "}\n\n";
Vector<StringRefNull> dependencies = {"eevee_nodetree_lib.glsl"};
dependencies.extend(info.dependencies_generated);
info.generated_sources.append({"eevee_nodetree_frag_lib.glsl", dependencies, frag_gen.str()});
}

View File

@@ -10,6 +10,8 @@
#include <string>
#include "BLI_set.hh"
#include "DNA_customdata_types.h" /* for eCustomDataType */
#include "DNA_image_types.h"
#include "DNA_listBase.h"
@@ -295,15 +297,30 @@ struct GPUNodeStack {
bool end;
};
struct GPUGraphOutput {
std::string serialized;
blender::Vector<blender::StringRefNull> dependencies;
bool empty() const
{
return serialized.empty();
}
std::string serialized_or_default(std::string value) const
{
return serialized.empty() ? value : serialized;
}
};
struct GPUCodegenOutput {
std::string attr_load;
/* Node-tree functions calls. */
std::string displacement;
std::string surface;
std::string volume;
std::string thickness;
std::string composite;
std::string material_functions;
GPUGraphOutput displacement;
GPUGraphOutput surface;
GPUGraphOutput volume;
GPUGraphOutput thickness;
GPUGraphOutput composite;
blender::Vector<GPUGraphOutput> material_functions;
GPUShaderCreateInfo *create_info;
};

View File

@@ -26,6 +26,7 @@
#include "GPU_vertex_format.hh"
#include "gpu_codegen.hh"
#include "gpu_material_library.hh"
#include "gpu_shader_dependency_private.hh"
#include <cstdarg>
@@ -289,34 +290,12 @@ void GPUCodegen::generate_resources()
info.typedef_source_generated = ss.str();
}
void GPUCodegen::generate_library()
void GPUCodegen::node_serialize(Set<StringRefNull> &used_libraries,
std::stringstream &eval_ss,
const GPUNode *node)
{
GPUCodegenCreateInfo &info = *create_info;
gpu_material_library_use_function(used_libraries, node->name);
void *value;
Vector<StringRefNull> source_files;
/* Iterate over libraries. We need to keep this struct intact in case it is required for the
* optimization pass. The first pass just collects the keys from the GSET, given items in a GSET
* are unordered this can cause order differences between invocations, so we collect the keys
* first, and sort them before doing actual work, to guarantee stable behavior while still
* having cheap insertions into the GSET */
GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
while (!BLI_ghashIterator_done(ihash)) {
value = BLI_ghashIterator_getKey(ihash);
source_files.append((const char *)value);
BLI_ghashIterator_step(ihash);
}
BLI_ghashIterator_free(ihash);
std::sort(source_files.begin(), source_files.end());
for (auto &key : source_files) {
info.dependencies_generated.append_non_duplicates(key);
}
}
void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
{
/* Declare constants. */
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
switch (input->source) {
@@ -392,14 +371,26 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
nodes_total_++;
}
std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
GPUNodeLink *output_link,
const char *output_default)
static Vector<StringRefNull> set_to_vector_stable(Set<StringRefNull> &set)
{
Vector<StringRefNull> source_files;
for (const StringRefNull &str : set) {
source_files.append(str);
}
/* Sort dependencies to avoid random order causing shader caching to fail (see #108289). */
std::sort(source_files.begin(), source_files.end());
return source_files;
}
GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
GPUNodeLink *output_link,
const char *output_default)
{
if (output_link == nullptr && output_default == nullptr) {
return "";
return {};
}
Set<StringRefNull> used_libraries;
std::stringstream eval_ss;
bool has_nodes = false;
/* NOTE: The node order is already top to bottom (or left to right in node editor)
@@ -408,12 +399,12 @@ std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
if ((node->tag & tree_tag) == 0) {
continue;
}
node_serialize(eval_ss, node);
node_serialize(used_libraries, eval_ss, node);
has_nodes = true;
}
if (!has_nodes) {
return "";
return {};
}
if (output_link) {
@@ -426,20 +417,21 @@ std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
std::string str = eval_ss.str();
BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
return str;
return {str, set_to_vector_stable(used_libraries)};
}
std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag)
GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag)
{
std::stringstream eval_ss;
Set<StringRefNull> used_libraries;
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
if (node->tag & tree_tag) {
node_serialize(eval_ss, node);
node_serialize(used_libraries, eval_ss, node);
}
}
std::string str = eval_ss.str();
BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
return str;
return {str, set_to_vector_stable(used_libraries)};
}
void GPUCodegen::generate_cryptomatte()
@@ -507,19 +499,18 @@ void GPUCodegen::generate_graphs()
}
if (!BLI_listbase_is_empty(&graph.material_functions)) {
std::stringstream eval_ss;
eval_ss << "\n/* Generated Functions */\n\n";
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) {
std::stringstream eval_ss;
/* Untag every node in the graph to avoid serializing nodes from other functions */
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
node->tag &= ~GPU_NODE_TAG_FUNCTION;
}
/* Tag only the nodes needed for the current function */
gpu_nodes_tag(func_link->outlink, GPU_NODE_TAG_FUNCTION);
const std::string fn = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
eval_ss << "float " << func_link->name << "() {\n" << fn << "}\n\n";
GPUGraphOutput graph = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
eval_ss << "float " << func_link->name << "() {\n" << graph.serialized << "}\n\n";
output.material_functions.append({eval_ss.str(), graph.dependencies});
}
output.material_functions = eval_ss.str();
/* Leave the function tags as they were before serialization */
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph.material_functions) {
gpu_nodes_tag(funclink->outlink, GPU_NODE_TAG_FUNCTION);

View File

@@ -12,6 +12,7 @@
#include "BLI_hash_mm2a.hh"
#include "BLI_listbase.h"
#include "BLI_set.hh"
#include "BLI_vector.hh"
#include "GPU_material.hh"
@@ -82,7 +83,6 @@ class GPUCodegen {
void generate_uniform_buffer();
void generate_attribs();
void generate_resources();
void generate_library();
uint32_t hash_get() const
{
@@ -96,11 +96,13 @@ class GPUCodegen {
private:
void set_unique_ids();
void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
std::string graph_serialize(GPUNodeTag tree_tag,
GPUNodeLink *output_link,
const char *output_default = nullptr);
std::string graph_serialize(GPUNodeTag tree_tag);
void node_serialize(blender::Set<blender::StringRefNull> &used_libraries,
std::stringstream &eval_ss,
const GPUNode *node);
GPUGraphOutput graph_serialize(GPUNodeTag tree_tag,
GPUNodeLink *output_link,
const char *output_default = nullptr);
GPUGraphOutput graph_serialize(GPUNodeTag tree_tag);
};
} // namespace blender::gpu::shader

View File

@@ -99,11 +99,7 @@ struct GPUMaterial {
std::string name;
GPUMaterial(eGPUMaterialEngine engine) : engine(engine)
{
graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
};
GPUMaterial(eGPUMaterialEngine engine) : engine(engine){};
~GPUMaterial()
{

View File

@@ -9,13 +9,12 @@
#pragma once
#include "BLI_set.hh"
#include "GPU_material.hh"
#define MAX_FUNCTION_NAME 64
#define MAX_PARAMETER 36
struct GSet;
enum GPUFunctionQual {
FUNCTION_QUAL_IN,
FUNCTION_QUAL_OUT,
@@ -31,4 +30,6 @@ struct GPUFunction {
void *source; /* GPUSource */
};
GPUFunction *gpu_material_library_use_function(GSet *used_libraries, const char *name);
GPUFunction *gpu_material_library_get_function(const char *name);
void gpu_material_library_use_function(blender::Set<blender::StringRefNull> &used_libraries,
const char *name);

View File

@@ -747,7 +747,7 @@ bool GPU_link(GPUMaterial *mat, const char *name, ...)
va_list params;
int i;
function = gpu_material_library_use_function(graph->used_libraries, name);
function = gpu_material_library_get_function(name);
if (!function) {
fprintf(stderr, "GPU failed to find function %s\n", name);
return false;
@@ -786,7 +786,7 @@ static bool gpu_stack_link_v(GPUMaterial *material,
GPUNodeLink *link, **linkptr;
int i, totin, totout;
function = gpu_material_library_use_function(graph->used_libraries, name);
function = gpu_material_library_get_function(name);
if (!function) {
fprintf(stderr, "GPU failed to find function %s\n", name);
return false;
@@ -930,11 +930,6 @@ void gpu_node_graph_free(GPUNodeGraph *graph)
BLI_freelistN(&graph->attributes);
GPU_uniform_attr_list_free(&graph->uniform_attrs);
BLI_freelistN(&graph->layer_attrs);
if (graph->used_libraries) {
BLI_gset_free(graph->used_libraries, nullptr);
graph->used_libraries = nullptr;
}
}
/* Prune Unused Nodes */

View File

@@ -177,9 +177,6 @@ struct GPUNodeGraph {
/* The list of layer attributes. */
ListBase layer_attrs;
/** Set of all the GLSL lib code blocks. */
GSet *used_libraries;
};
/* Node Graph */

View File

@@ -383,7 +383,6 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
/* The shader is not compiled, continue generating the shader strings. */
codegen.generate_attribs();
codegen.generate_resources();
codegen.generate_library();
/* Make engine add its own code and implement the generated functions. */
finalize_source_cb(thunk, material, &codegen.output);

View File

@@ -510,15 +510,21 @@ void gpu_shader_dependency_exit()
g_functions = nullptr;
}
GPUFunction *gpu_material_library_use_function(GSet *used_libraries, const char *name)
GPUFunction *gpu_material_library_get_function(const char *name)
{
GPUFunction *function = g_functions->lookup_default(name, nullptr);
BLI_assert_msg(function != nullptr, "Requested function not in the function library");
GPUSource *source = reinterpret_cast<GPUSource *>(function->source);
BLI_gset_add(used_libraries, const_cast<char *>(source->filename.c_str()));
return function;
}
void gpu_material_library_use_function(blender::Set<blender::StringRefNull> &used_libraries,
const char *name)
{
GPUFunction *function = g_functions->lookup_default(name, nullptr);
GPUSource *source = reinterpret_cast<GPUSource *>(function->source);
used_libraries.add(source->filename.c_str());
}
namespace blender::gpu::shader {
bool gpu_shader_dependency_force_gpu_print_injection()