GPU: Shader Codegen: Split different graph includes
This allows to reduce the number of includes for each tree graph (surface, volume, displacement) and reduce the code size significantly for most vertex shaders, speeding up compile time. Rel #145347 Pull Request: https://projects.blender.org/blender/blender/pulls/146419
This commit is contained in:
committed by
Clément Foucault
parent
1509985013
commit
fb3904ce45
@@ -549,12 +549,13 @@ void ShaderOperation::generate_code(void *thunk,
|
||||
|
||||
eval_code += operation->generate_code_for_inputs(material, shader_create_info);
|
||||
|
||||
eval_code += code_generator_output->composite;
|
||||
eval_code += code_generator_output->composite.serialized;
|
||||
|
||||
eval_code += "}\n";
|
||||
|
||||
shader_create_info.generated_sources.append(
|
||||
{"gpu_shader_compositor_eval.glsl", shader_create_info.dependencies_generated, eval_code});
|
||||
shader_create_info.generated_sources.append({"gpu_shader_compositor_eval.glsl",
|
||||
code_generator_output->composite.dependencies,
|
||||
eval_code});
|
||||
}
|
||||
|
||||
/* Texture storers in the shader always take a [i]vec4 as an argument, so encode each type in an
|
||||
|
||||
@@ -903,17 +903,26 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
|
||||
|
||||
vert_gen << "float3 nodetree_displacement()\n";
|
||||
vert_gen << "{\n";
|
||||
vert_gen << ((use_vertex_displacement) ? codegen.displacement : "return float3(0);\n");
|
||||
vert_gen << ((use_vertex_displacement) ? codegen.displacement.serialized :
|
||||
"return float3(0);\n");
|
||||
vert_gen << "}\n\n";
|
||||
|
||||
Vector<StringRefNull> dependencies = {"eevee_nodetree_lib.glsl"};
|
||||
dependencies.extend(info.dependencies_generated);
|
||||
Vector<StringRefNull> dependencies = {};
|
||||
if (use_vertex_displacement) {
|
||||
dependencies.append("eevee_nodetree_lib.glsl");
|
||||
dependencies.extend(codegen.displacement.dependencies);
|
||||
}
|
||||
|
||||
info.generated_sources.append({"eevee_nodetree_vert_lib.glsl", dependencies, vert_gen.str()});
|
||||
}
|
||||
|
||||
if (pipeline_type != MAT_PIPE_VOLUME_OCCUPANCY) {
|
||||
frag_gen << (!codegen.material_functions.empty() ? codegen.material_functions : "\n");
|
||||
Vector<StringRefNull> dependencies = {"eevee_nodetree_lib.glsl"};
|
||||
|
||||
for (const auto &graph : codegen.material_functions) {
|
||||
frag_gen << graph.serialized;
|
||||
dependencies.extend(graph.dependencies);
|
||||
}
|
||||
|
||||
if (!codegen.displacement.empty()) {
|
||||
/* Bump displacement. Needed to recompute normals after displacement. */
|
||||
@@ -921,14 +930,16 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
|
||||
|
||||
frag_gen << "float3 nodetree_displacement()\n";
|
||||
frag_gen << "{\n";
|
||||
frag_gen << codegen.displacement;
|
||||
frag_gen << codegen.displacement.serialized;
|
||||
dependencies.extend(codegen.displacement.dependencies);
|
||||
frag_gen << "}\n\n";
|
||||
}
|
||||
|
||||
frag_gen << "Closure nodetree_surface(float closure_rand)\n";
|
||||
frag_gen << "{\n";
|
||||
frag_gen << " closure_weights_reset(closure_rand);\n";
|
||||
frag_gen << (!codegen.surface.empty() ? codegen.surface : "return Closure(0);\n");
|
||||
frag_gen << codegen.surface.serialized_or_default("return Closure(0);\n");
|
||||
dependencies.extend(codegen.surface.dependencies);
|
||||
frag_gen << "}\n\n";
|
||||
|
||||
/* TODO(fclem): Find a way to pass material parameters inside the material UBO. */
|
||||
@@ -960,19 +971,18 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
|
||||
}
|
||||
}
|
||||
else {
|
||||
frag_gen << codegen.thickness;
|
||||
frag_gen << codegen.thickness.serialized;
|
||||
dependencies.extend(codegen.thickness.dependencies);
|
||||
}
|
||||
frag_gen << "}\n\n";
|
||||
|
||||
frag_gen << "Closure nodetree_volume()\n";
|
||||
frag_gen << "{\n";
|
||||
frag_gen << " closure_weights_reset(0.0);\n";
|
||||
frag_gen << (!codegen.volume.empty() ? codegen.volume : "return Closure(0);\n");
|
||||
frag_gen << codegen.volume.serialized_or_default("return Closure(0);\n");
|
||||
dependencies.extend(codegen.volume.dependencies);
|
||||
frag_gen << "}\n\n";
|
||||
|
||||
Vector<StringRefNull> dependencies = {"eevee_nodetree_lib.glsl"};
|
||||
dependencies.extend(info.dependencies_generated);
|
||||
|
||||
info.generated_sources.append({"eevee_nodetree_frag_lib.glsl", dependencies, frag_gen.str()});
|
||||
}
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "BLI_set.hh"
|
||||
|
||||
#include "DNA_customdata_types.h" /* for eCustomDataType */
|
||||
#include "DNA_image_types.h"
|
||||
#include "DNA_listBase.h"
|
||||
@@ -295,15 +297,30 @@ struct GPUNodeStack {
|
||||
bool end;
|
||||
};
|
||||
|
||||
struct GPUGraphOutput {
|
||||
std::string serialized;
|
||||
blender::Vector<blender::StringRefNull> dependencies;
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return serialized.empty();
|
||||
}
|
||||
|
||||
std::string serialized_or_default(std::string value) const
|
||||
{
|
||||
return serialized.empty() ? value : serialized;
|
||||
}
|
||||
};
|
||||
|
||||
struct GPUCodegenOutput {
|
||||
std::string attr_load;
|
||||
/* Node-tree functions calls. */
|
||||
std::string displacement;
|
||||
std::string surface;
|
||||
std::string volume;
|
||||
std::string thickness;
|
||||
std::string composite;
|
||||
std::string material_functions;
|
||||
GPUGraphOutput displacement;
|
||||
GPUGraphOutput surface;
|
||||
GPUGraphOutput volume;
|
||||
GPUGraphOutput thickness;
|
||||
GPUGraphOutput composite;
|
||||
blender::Vector<GPUGraphOutput> material_functions;
|
||||
|
||||
GPUShaderCreateInfo *create_info;
|
||||
};
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "GPU_vertex_format.hh"
|
||||
|
||||
#include "gpu_codegen.hh"
|
||||
#include "gpu_material_library.hh"
|
||||
#include "gpu_shader_dependency_private.hh"
|
||||
|
||||
#include <cstdarg>
|
||||
@@ -289,34 +290,12 @@ void GPUCodegen::generate_resources()
|
||||
info.typedef_source_generated = ss.str();
|
||||
}
|
||||
|
||||
void GPUCodegen::generate_library()
|
||||
void GPUCodegen::node_serialize(Set<StringRefNull> &used_libraries,
|
||||
std::stringstream &eval_ss,
|
||||
const GPUNode *node)
|
||||
{
|
||||
GPUCodegenCreateInfo &info = *create_info;
|
||||
gpu_material_library_use_function(used_libraries, node->name);
|
||||
|
||||
void *value;
|
||||
Vector<StringRefNull> source_files;
|
||||
|
||||
/* Iterate over libraries. We need to keep this struct intact in case it is required for the
|
||||
* optimization pass. The first pass just collects the keys from the GSET, given items in a GSET
|
||||
* are unordered this can cause order differences between invocations, so we collect the keys
|
||||
* first, and sort them before doing actual work, to guarantee stable behavior while still
|
||||
* having cheap insertions into the GSET */
|
||||
GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
|
||||
while (!BLI_ghashIterator_done(ihash)) {
|
||||
value = BLI_ghashIterator_getKey(ihash);
|
||||
source_files.append((const char *)value);
|
||||
BLI_ghashIterator_step(ihash);
|
||||
}
|
||||
BLI_ghashIterator_free(ihash);
|
||||
|
||||
std::sort(source_files.begin(), source_files.end());
|
||||
for (auto &key : source_files) {
|
||||
info.dependencies_generated.append_non_duplicates(key);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
|
||||
{
|
||||
/* Declare constants. */
|
||||
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
||||
switch (input->source) {
|
||||
@@ -392,14 +371,26 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
|
||||
nodes_total_++;
|
||||
}
|
||||
|
||||
std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
|
||||
GPUNodeLink *output_link,
|
||||
const char *output_default)
|
||||
static Vector<StringRefNull> set_to_vector_stable(Set<StringRefNull> &set)
|
||||
{
|
||||
Vector<StringRefNull> source_files;
|
||||
for (const StringRefNull &str : set) {
|
||||
source_files.append(str);
|
||||
}
|
||||
/* Sort dependencies to avoid random order causing shader caching to fail (see #108289). */
|
||||
std::sort(source_files.begin(), source_files.end());
|
||||
return source_files;
|
||||
}
|
||||
|
||||
GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
|
||||
GPUNodeLink *output_link,
|
||||
const char *output_default)
|
||||
{
|
||||
if (output_link == nullptr && output_default == nullptr) {
|
||||
return "";
|
||||
return {};
|
||||
}
|
||||
|
||||
Set<StringRefNull> used_libraries;
|
||||
std::stringstream eval_ss;
|
||||
bool has_nodes = false;
|
||||
/* NOTE: The node order is already top to bottom (or left to right in node editor)
|
||||
@@ -408,12 +399,12 @@ std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
|
||||
if ((node->tag & tree_tag) == 0) {
|
||||
continue;
|
||||
}
|
||||
node_serialize(eval_ss, node);
|
||||
node_serialize(used_libraries, eval_ss, node);
|
||||
has_nodes = true;
|
||||
}
|
||||
|
||||
if (!has_nodes) {
|
||||
return "";
|
||||
return {};
|
||||
}
|
||||
|
||||
if (output_link) {
|
||||
@@ -426,20 +417,21 @@ std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
|
||||
|
||||
std::string str = eval_ss.str();
|
||||
BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
|
||||
return str;
|
||||
return {str, set_to_vector_stable(used_libraries)};
|
||||
}
|
||||
|
||||
std::string GPUCodegen::graph_serialize(GPUNodeTag tree_tag)
|
||||
GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag)
|
||||
{
|
||||
std::stringstream eval_ss;
|
||||
Set<StringRefNull> used_libraries;
|
||||
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
||||
if (node->tag & tree_tag) {
|
||||
node_serialize(eval_ss, node);
|
||||
node_serialize(used_libraries, eval_ss, node);
|
||||
}
|
||||
}
|
||||
std::string str = eval_ss.str();
|
||||
BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
|
||||
return str;
|
||||
return {str, set_to_vector_stable(used_libraries)};
|
||||
}
|
||||
|
||||
void GPUCodegen::generate_cryptomatte()
|
||||
@@ -507,19 +499,18 @@ void GPUCodegen::generate_graphs()
|
||||
}
|
||||
|
||||
if (!BLI_listbase_is_empty(&graph.material_functions)) {
|
||||
std::stringstream eval_ss;
|
||||
eval_ss << "\n/* Generated Functions */\n\n";
|
||||
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) {
|
||||
std::stringstream eval_ss;
|
||||
/* Untag every node in the graph to avoid serializing nodes from other functions */
|
||||
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
||||
node->tag &= ~GPU_NODE_TAG_FUNCTION;
|
||||
}
|
||||
/* Tag only the nodes needed for the current function */
|
||||
gpu_nodes_tag(func_link->outlink, GPU_NODE_TAG_FUNCTION);
|
||||
const std::string fn = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
|
||||
eval_ss << "float " << func_link->name << "() {\n" << fn << "}\n\n";
|
||||
GPUGraphOutput graph = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
|
||||
eval_ss << "float " << func_link->name << "() {\n" << graph.serialized << "}\n\n";
|
||||
output.material_functions.append({eval_ss.str(), graph.dependencies});
|
||||
}
|
||||
output.material_functions = eval_ss.str();
|
||||
/* Leave the function tags as they were before serialization */
|
||||
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph.material_functions) {
|
||||
gpu_nodes_tag(funclink->outlink, GPU_NODE_TAG_FUNCTION);
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
|
||||
#include "BLI_hash_mm2a.hh"
|
||||
#include "BLI_listbase.h"
|
||||
#include "BLI_set.hh"
|
||||
#include "BLI_vector.hh"
|
||||
|
||||
#include "GPU_material.hh"
|
||||
@@ -82,7 +83,6 @@ class GPUCodegen {
|
||||
void generate_uniform_buffer();
|
||||
void generate_attribs();
|
||||
void generate_resources();
|
||||
void generate_library();
|
||||
|
||||
uint32_t hash_get() const
|
||||
{
|
||||
@@ -96,11 +96,13 @@ class GPUCodegen {
|
||||
private:
|
||||
void set_unique_ids();
|
||||
|
||||
void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
|
||||
std::string graph_serialize(GPUNodeTag tree_tag,
|
||||
GPUNodeLink *output_link,
|
||||
const char *output_default = nullptr);
|
||||
std::string graph_serialize(GPUNodeTag tree_tag);
|
||||
void node_serialize(blender::Set<blender::StringRefNull> &used_libraries,
|
||||
std::stringstream &eval_ss,
|
||||
const GPUNode *node);
|
||||
GPUGraphOutput graph_serialize(GPUNodeTag tree_tag,
|
||||
GPUNodeLink *output_link,
|
||||
const char *output_default = nullptr);
|
||||
GPUGraphOutput graph_serialize(GPUNodeTag tree_tag);
|
||||
};
|
||||
|
||||
} // namespace blender::gpu::shader
|
||||
|
||||
@@ -99,11 +99,7 @@ struct GPUMaterial {
|
||||
|
||||
std::string name;
|
||||
|
||||
GPUMaterial(eGPUMaterialEngine engine) : engine(engine)
|
||||
{
|
||||
graph.used_libraries = BLI_gset_new(
|
||||
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
|
||||
};
|
||||
GPUMaterial(eGPUMaterialEngine engine) : engine(engine){};
|
||||
|
||||
~GPUMaterial()
|
||||
{
|
||||
|
||||
@@ -9,13 +9,12 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_set.hh"
|
||||
#include "GPU_material.hh"
|
||||
|
||||
#define MAX_FUNCTION_NAME 64
|
||||
#define MAX_PARAMETER 36
|
||||
|
||||
struct GSet;
|
||||
|
||||
enum GPUFunctionQual {
|
||||
FUNCTION_QUAL_IN,
|
||||
FUNCTION_QUAL_OUT,
|
||||
@@ -31,4 +30,6 @@ struct GPUFunction {
|
||||
void *source; /* GPUSource */
|
||||
};
|
||||
|
||||
GPUFunction *gpu_material_library_use_function(GSet *used_libraries, const char *name);
|
||||
GPUFunction *gpu_material_library_get_function(const char *name);
|
||||
void gpu_material_library_use_function(blender::Set<blender::StringRefNull> &used_libraries,
|
||||
const char *name);
|
||||
|
||||
@@ -747,7 +747,7 @@ bool GPU_link(GPUMaterial *mat, const char *name, ...)
|
||||
va_list params;
|
||||
int i;
|
||||
|
||||
function = gpu_material_library_use_function(graph->used_libraries, name);
|
||||
function = gpu_material_library_get_function(name);
|
||||
if (!function) {
|
||||
fprintf(stderr, "GPU failed to find function %s\n", name);
|
||||
return false;
|
||||
@@ -786,7 +786,7 @@ static bool gpu_stack_link_v(GPUMaterial *material,
|
||||
GPUNodeLink *link, **linkptr;
|
||||
int i, totin, totout;
|
||||
|
||||
function = gpu_material_library_use_function(graph->used_libraries, name);
|
||||
function = gpu_material_library_get_function(name);
|
||||
if (!function) {
|
||||
fprintf(stderr, "GPU failed to find function %s\n", name);
|
||||
return false;
|
||||
@@ -930,11 +930,6 @@ void gpu_node_graph_free(GPUNodeGraph *graph)
|
||||
BLI_freelistN(&graph->attributes);
|
||||
GPU_uniform_attr_list_free(&graph->uniform_attrs);
|
||||
BLI_freelistN(&graph->layer_attrs);
|
||||
|
||||
if (graph->used_libraries) {
|
||||
BLI_gset_free(graph->used_libraries, nullptr);
|
||||
graph->used_libraries = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
/* Prune Unused Nodes */
|
||||
|
||||
@@ -177,9 +177,6 @@ struct GPUNodeGraph {
|
||||
|
||||
/* The list of layer attributes. */
|
||||
ListBase layer_attrs;
|
||||
|
||||
/** Set of all the GLSL lib code blocks. */
|
||||
GSet *used_libraries;
|
||||
};
|
||||
|
||||
/* Node Graph */
|
||||
|
||||
@@ -383,7 +383,6 @@ GPUPass *GPU_generate_pass(GPUMaterial *material,
|
||||
/* The shader is not compiled, continue generating the shader strings. */
|
||||
codegen.generate_attribs();
|
||||
codegen.generate_resources();
|
||||
codegen.generate_library();
|
||||
|
||||
/* Make engine add its own code and implement the generated functions. */
|
||||
finalize_source_cb(thunk, material, &codegen.output);
|
||||
|
||||
@@ -510,15 +510,21 @@ void gpu_shader_dependency_exit()
|
||||
g_functions = nullptr;
|
||||
}
|
||||
|
||||
GPUFunction *gpu_material_library_use_function(GSet *used_libraries, const char *name)
|
||||
GPUFunction *gpu_material_library_get_function(const char *name)
|
||||
{
|
||||
GPUFunction *function = g_functions->lookup_default(name, nullptr);
|
||||
BLI_assert_msg(function != nullptr, "Requested function not in the function library");
|
||||
GPUSource *source = reinterpret_cast<GPUSource *>(function->source);
|
||||
BLI_gset_add(used_libraries, const_cast<char *>(source->filename.c_str()));
|
||||
return function;
|
||||
}
|
||||
|
||||
void gpu_material_library_use_function(blender::Set<blender::StringRefNull> &used_libraries,
|
||||
const char *name)
|
||||
{
|
||||
GPUFunction *function = g_functions->lookup_default(name, nullptr);
|
||||
GPUSource *source = reinterpret_cast<GPUSource *>(function->source);
|
||||
used_libraries.add(source->filename.c_str());
|
||||
}
|
||||
|
||||
namespace blender::gpu::shader {
|
||||
|
||||
bool gpu_shader_dependency_force_gpu_print_injection()
|
||||
|
||||
Reference in New Issue
Block a user