2023-08-16 00:20:26 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2005 Blender Authors
|
2023-05-31 16:19:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
/** \file
|
|
|
|
|
* \ingroup gpu
|
|
|
|
|
*
|
|
|
|
|
* Convert material node-trees to GLSL.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "MEM_guardedalloc.h"
|
|
|
|
|
|
2022-09-13 11:07:30 +02:00
|
|
|
#include "DNA_material_types.h"
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-01-22 11:19:02 +01:00
|
|
|
#include "BLI_span.hh"
|
2023-09-01 21:37:11 +02:00
|
|
|
#include "BLI_string.h"
|
2025-05-22 17:53:22 +02:00
|
|
|
#include "BLI_vector.hh"
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2022-09-13 11:07:30 +02:00
|
|
|
#include "BKE_cryptomatte.hh"
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-01-22 11:19:02 +01:00
|
|
|
#include "IMB_colormanagement.hh"
|
|
|
|
|
|
2024-03-23 01:24:18 +01:00
|
|
|
#include "GPU_capabilities.hh"
|
|
|
|
|
#include "GPU_shader.hh"
|
2024-02-11 16:10:00 -05:00
|
|
|
#include "GPU_uniform_buffer.hh"
|
2024-03-23 01:24:18 +01:00
|
|
|
#include "GPU_vertex_format.hh"
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2024-03-23 01:24:18 +01:00
|
|
|
#include "gpu_codegen.hh"
|
2025-09-22 10:24:10 +02:00
|
|
|
#include "gpu_material_library.hh"
|
2024-03-23 01:24:18 +01:00
|
|
|
#include "gpu_shader_dependency_private.hh"
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2022-06-10 10:29:35 +02:00
|
|
|
#include <cstdarg>
|
|
|
|
|
#include <cstring>
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
using namespace blender;
|
2022-04-14 18:47:58 +02:00
|
|
|
using namespace blender::gpu::shader;
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Type > string conversion
|
|
|
|
|
* \{ */
|
|
|
|
|
|
|
|
|
|
static std::ostream &operator<<(std::ostream &stream, const GPUInput *input)
|
|
|
|
|
{
|
|
|
|
|
switch (input->source) {
|
|
|
|
|
case GPU_SOURCE_FUNCTION_CALL:
|
|
|
|
|
case GPU_SOURCE_OUTPUT:
|
2025-10-08 16:38:14 +02:00
|
|
|
return stream << (input->is_zone_io ? "zone" : "tmp") << input->id;
|
2022-04-14 18:47:58 +02:00
|
|
|
case GPU_SOURCE_CONSTANT:
|
2025-10-08 16:38:14 +02:00
|
|
|
return stream << (input->is_zone_io ? "zone" : "cons") << input->id;
|
2022-04-14 18:47:58 +02:00
|
|
|
case GPU_SOURCE_UNIFORM:
|
2025-10-08 16:38:14 +02:00
|
|
|
return stream << "node_tree.u" << input->id << (input->is_duplicate ? "b" : "");
|
2022-04-14 18:47:58 +02:00
|
|
|
case GPU_SOURCE_ATTR:
|
|
|
|
|
return stream << "var_attrs.v" << input->attr->id;
|
|
|
|
|
case GPU_SOURCE_UNIFORM_ATTR:
|
2023-07-17 16:44:20 +02:00
|
|
|
return stream << "UNI_ATTR(unf_attrs[resource_id].attr" << input->uniform_attr->id << ")";
|
Attribute Node: support accessing attributes of View Layer and Scene.
The attribute node already allows accessing attributes associated
with objects and meshes, which allows changing the behavior of the
same material between different objects or instances. The same idea
can be extended to an even more global level of layers and scenes.
Currently view layers provide an option to replace all materials
with a different one. However, since the same material will be applied
to all objects in the layer, varying the behavior between layers while
preserving distinct materials requires duplicating objects.
Providing access to properties of layers and scenes via the attribute
node enables making materials with built-in switches or settings that
can be controlled globally at the view layer level. This is probably
most useful for complex NPR shading and compositing. Like with objects,
the node can also access built-in scene properties, like render resolution
or FOV of the active camera. Lookup is also attempted in World, similar
to how the Object mode checks the Mesh datablock.
In Cycles this mode is implemented by replacing the attribute node with
the attribute value during sync, allowing constant folding to take the
values into account. This means however that materials that use this
feature have to be re-synced upon any changes to scene, world or camera.
The Eevee version uses a new uniform buffer containing a sorted array
mapping name hashes to values, with binary search lookup. The array
is limited to 512 entries, which is effectively limitless even
considering it is shared by all materials in the scene; it is also
just 16KB of memory so no point trying to optimize further.
The buffer has to be rebuilt when new attributes are detected in a
material, so the draw engine keeps a table of recently seen attribute
names to minimize the chance of extra rebuilds mid-draw.
Differential Revision: https://developer.blender.org/D15941
2022-09-12 00:30:58 +03:00
|
|
|
case GPU_SOURCE_LAYER_ATTR:
|
|
|
|
|
return stream << "attr_load_layer(" << input->layer_attr->hash_code << ")";
|
2022-04-14 18:47:58 +02:00
|
|
|
case GPU_SOURCE_STRUCT:
|
|
|
|
|
return stream << "strct" << input->id;
|
|
|
|
|
case GPU_SOURCE_TEX:
|
|
|
|
|
return stream << input->texture->sampler_name;
|
|
|
|
|
case GPU_SOURCE_TEX_TILED_MAPPING:
|
|
|
|
|
return stream << input->texture->tiled_mapping_name;
|
|
|
|
|
default:
|
|
|
|
|
BLI_assert(0);
|
|
|
|
|
return stream;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
|
|
|
|
|
{
|
2025-10-08 16:38:14 +02:00
|
|
|
return stream << (output->is_zone_io ? "zone" : "tmp") << output->id;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
|
2025-05-22 17:53:22 +02:00
|
|
|
static std::ostream &operator<<(std::ostream &stream, const Span<float> &span)
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
2025-09-15 15:11:02 +02:00
|
|
|
stream << (GPUType)span.size() << "(";
|
2025-01-22 11:19:02 +01:00
|
|
|
/* Use uint representation to allow exact same bit pattern even if NaN. This is
|
|
|
|
|
* because we can pass UINTs as floats for constants. */
|
2025-05-22 17:53:22 +02:00
|
|
|
const Span<uint32_t> uint_span = span.cast<uint32_t>();
|
2025-01-22 11:19:02 +01:00
|
|
|
for (const uint32_t &element : uint_span) {
|
2023-07-31 10:15:30 +10:00
|
|
|
char formatted_float[32];
|
2025-01-22 11:19:02 +01:00
|
|
|
SNPRINTF(formatted_float, "uintBitsToFloat(%uu)", element);
|
2023-07-31 10:15:30 +10:00
|
|
|
stream << formatted_float;
|
2025-01-22 11:19:02 +01:00
|
|
|
if (&element != &uint_span.last()) {
|
2022-04-14 18:47:58 +02:00
|
|
|
stream << ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
stream << ")";
|
|
|
|
|
return stream;
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-22 11:19:02 +01:00
|
|
|
/* Trick type to change overload and keep a somewhat nice syntax. */
|
|
|
|
|
struct GPUConstant : public GPUInput {};
|
|
|
|
|
|
|
|
|
|
static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
|
|
|
|
|
{
|
2025-05-22 17:53:22 +02:00
|
|
|
stream << Span<float>(input->vec, input->type);
|
2025-01-22 11:19:02 +01:00
|
|
|
return stream;
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
namespace blender::gpu::shader {
|
|
|
|
|
/* Needed to use the << operators from nested namespaces. :(
|
|
|
|
|
* https://stackoverflow.com/questions/5195512/namespaces-and-operator-resolution */
|
|
|
|
|
using ::operator<<;
|
|
|
|
|
} // namespace blender::gpu::shader
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name GLSL code generation
|
|
|
|
|
* \{ */
|
|
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
const char *GPUCodegenCreateInfo::NameBuffer::append_sampler_name(const char name[32])
|
|
|
|
|
{
|
|
|
|
|
auto index = sampler_names.size();
|
|
|
|
|
sampler_names.append(std::make_unique<NameEntry>());
|
|
|
|
|
char *name_buffer = sampler_names[index]->data();
|
|
|
|
|
memcpy(name_buffer, name, 32);
|
|
|
|
|
return name_buffer;
|
|
|
|
|
}
|
2023-02-14 21:51:03 +01:00
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
GPUCodegen::GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name)
|
|
|
|
|
: mat(*mat_), graph(*graph_)
|
|
|
|
|
{
|
|
|
|
|
BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat));
|
|
|
|
|
BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat));
|
|
|
|
|
create_info = MEM_new<GPUCodegenCreateInfo>(__func__, debug_name);
|
|
|
|
|
output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
|
|
|
|
|
static_cast<ShaderCreateInfo *>(create_info));
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
GPUCodegen::~GPUCodegen()
|
|
|
|
|
{
|
|
|
|
|
MEM_SAFE_FREE(cryptomatte_input_);
|
|
|
|
|
MEM_delete(create_info);
|
|
|
|
|
BLI_freelistN(&ubo_inputs_);
|
2022-04-14 18:47:58 +02:00
|
|
|
};
|
|
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
bool GPUCodegen::should_optimize_heuristic() const
|
|
|
|
|
{
|
|
|
|
|
/* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
|
|
|
|
|
* the baseline is met. */
|
|
|
|
|
bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
|
|
|
|
|
(textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
|
|
|
|
|
return do_optimize;
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
void GPUCodegen::generate_attribs()
|
|
|
|
|
{
|
|
|
|
|
if (BLI_listbase_is_empty(&graph.attributes)) {
|
2024-02-12 19:33:44 +01:00
|
|
|
output.attr_load.clear();
|
2022-04-14 18:47:58 +02:00
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GPUCodegenCreateInfo &info = *create_info;
|
|
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
info.interface_generated = MEM_new<StageInterfaceInfo>(__func__, "codegen_iface", "var_attrs");
|
2022-04-14 18:47:58 +02:00
|
|
|
StageInterfaceInfo &iface = *info.interface_generated;
|
|
|
|
|
info.vertex_out(iface);
|
|
|
|
|
|
|
|
|
|
/* Input declaration, loading / assignment to interface and geometry shader passthrough. */
|
2022-07-01 10:10:36 +02:00
|
|
|
std::stringstream load_ss;
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-08-27 09:49:43 +02:00
|
|
|
/* Index of the attribute as ordered in graph.attributes. */
|
|
|
|
|
int attr_n = 0;
|
2025-08-12 14:04:41 +02:00
|
|
|
int slot = 15;
|
2022-04-14 18:47:58 +02:00
|
|
|
LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
|
2022-04-27 15:59:18 +02:00
|
|
|
if (slot == -1) {
|
|
|
|
|
BLI_assert_msg(0, "Too many attributes");
|
|
|
|
|
break;
|
|
|
|
|
}
|
2022-05-09 20:06:12 +02:00
|
|
|
STRNCPY(info.name_buffer.attr_names[slot], attr->input_name);
|
|
|
|
|
SNPRINTF(info.name_buffer.var_names[slot], "v%d", attr->id);
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-05-22 17:53:22 +02:00
|
|
|
StringRefNull attr_name = info.name_buffer.attr_names[slot];
|
|
|
|
|
StringRefNull var_name = info.name_buffer.var_names[slot];
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-09-15 15:11:02 +02:00
|
|
|
GPUType input_type, iface_type;
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
load_ss << "var_attrs." << var_name;
|
2025-08-27 09:49:43 +02:00
|
|
|
if (attr->is_hair_length || attr->is_hair_intercept) {
|
2023-06-28 17:17:31 +02:00
|
|
|
iface_type = input_type = GPU_FLOAT;
|
2025-08-27 09:49:43 +02:00
|
|
|
load_ss << " = attr_load_" << input_type << "(domain, " << attr_name << ", " << attr_n
|
|
|
|
|
<< ");\n";
|
2023-06-28 17:17:31 +02:00
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
switch (attr->type) {
|
|
|
|
|
case CD_ORCO:
|
|
|
|
|
/* Need vec4 to detect usage of default attribute. */
|
|
|
|
|
input_type = GPU_VEC4;
|
|
|
|
|
iface_type = GPU_VEC3;
|
2025-08-27 09:49:43 +02:00
|
|
|
load_ss << " = attr_load_orco(domain, " << attr_name << ", " << attr_n << ");\n";
|
2023-06-28 17:17:31 +02:00
|
|
|
break;
|
|
|
|
|
case CD_TANGENT:
|
|
|
|
|
iface_type = input_type = GPU_VEC4;
|
2025-08-27 09:49:43 +02:00
|
|
|
load_ss << " = attr_load_tangent(domain, " << attr_name << ", " << attr_n << ");\n";
|
2023-06-28 17:17:31 +02:00
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
iface_type = input_type = GPU_VEC4;
|
2025-08-27 09:49:43 +02:00
|
|
|
load_ss << " = attr_load_" << input_type << "(domain, " << attr_name << ", " << attr_n
|
|
|
|
|
<< ");\n";
|
2023-06-28 17:17:31 +02:00
|
|
|
break;
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
2025-08-27 09:49:43 +02:00
|
|
|
attr_n++;
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
info.vertex_in(slot--, to_type(input_type), attr_name);
|
|
|
|
|
iface.smooth(to_type(iface_type), var_name);
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-12 19:33:44 +01:00
|
|
|
output.attr_load = load_ss.str();
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GPUCodegen::generate_resources()
|
|
|
|
|
{
|
|
|
|
|
GPUCodegenCreateInfo &info = *create_info;
|
|
|
|
|
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
|
|
|
|
|
/* Textures. */
|
2022-08-30 22:01:52 +02:00
|
|
|
int slot = 0;
|
2022-04-14 18:47:58 +02:00
|
|
|
LISTBASE_FOREACH (GPUMaterialTexture *, tex, &graph.textures) {
|
|
|
|
|
if (tex->colorband) {
|
2022-05-09 20:06:12 +02:00
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
2025-04-24 14:38:13 +02:00
|
|
|
info.sampler(slot++, ImageType::Float1DArray, name, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
2022-09-16 15:04:47 +02:00
|
|
|
else if (tex->sky) {
|
|
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
2025-04-24 14:38:13 +02:00
|
|
|
info.sampler(0, ImageType::Float2DArray, name, Frequency::BATCH);
|
2022-09-16 15:04:47 +02:00
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
else if (tex->tiled_mapping_name[0] != '\0') {
|
2022-05-09 20:06:12 +02:00
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
2025-04-24 14:38:13 +02:00
|
|
|
info.sampler(slot++, ImageType::Float2DArray, name, Frequency::BATCH);
|
2022-05-09 20:06:12 +02:00
|
|
|
|
|
|
|
|
const char *name_mapping = info.name_buffer.append_sampler_name(tex->tiled_mapping_name);
|
2025-04-24 14:38:13 +02:00
|
|
|
info.sampler(slot++, ImageType::Float1DArray, name_mapping, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
else {
|
2022-05-09 20:06:12 +02:00
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
2025-04-24 14:38:13 +02:00
|
|
|
info.sampler(slot++, ImageType::Float2D, name, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-14 21:51:03 +01:00
|
|
|
/* Increment heuristic. */
|
|
|
|
|
textures_total_ = slot;
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
|
|
|
|
|
/* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
|
|
|
|
|
ss << "struct NodeTree {\n";
|
|
|
|
|
LISTBASE_FOREACH (LinkData *, link, &ubo_inputs_) {
|
|
|
|
|
GPUInput *input = (GPUInput *)(link->data);
|
2022-09-13 11:07:30 +02:00
|
|
|
if (input->source == GPU_SOURCE_CRYPTOMATTE) {
|
|
|
|
|
ss << input->type << " crypto_hash;\n";
|
|
|
|
|
}
|
|
|
|
|
else {
|
2025-10-08 16:38:14 +02:00
|
|
|
ss << input->type << " u" << input->id << (input->is_duplicate ? "b" : "") << ";\n";
|
2022-09-13 11:07:30 +02:00
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
ss << "};\n\n";
|
|
|
|
|
|
2023-06-29 21:10:08 +02:00
|
|
|
info.uniform_buf(GPU_NODE_TREE_UBO_SLOT, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!BLI_listbase_is_empty(&graph.uniform_attrs.list)) {
|
|
|
|
|
ss << "struct UniformAttrs {\n";
|
|
|
|
|
LISTBASE_FOREACH (GPUUniformAttr *, attr, &graph.uniform_attrs.list) {
|
|
|
|
|
ss << "vec4 attr" << attr->id << ";\n";
|
|
|
|
|
}
|
|
|
|
|
ss << "};\n\n";
|
|
|
|
|
|
|
|
|
|
/* TODO(fclem): Use the macro for length. Currently not working for EEVEE. */
|
|
|
|
|
/* DRW_RESOURCE_CHUNK_LEN = 512 */
|
2022-08-30 22:01:52 +02:00
|
|
|
info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
Attribute Node: support accessing attributes of View Layer and Scene.
The attribute node already allows accessing attributes associated
with objects and meshes, which allows changing the behavior of the
same material between different objects or instances. The same idea
can be extended to an even more global level of layers and scenes.
Currently view layers provide an option to replace all materials
with a different one. However, since the same material will be applied
to all objects in the layer, varying the behavior between layers while
preserving distinct materials requires duplicating objects.
Providing access to properties of layers and scenes via the attribute
node enables making materials with built-in switches or settings that
can be controlled globally at the view layer level. This is probably
most useful for complex NPR shading and compositing. Like with objects,
the node can also access built-in scene properties, like render resolution
or FOV of the active camera. Lookup is also attempted in World, similar
to how the Object mode checks the Mesh datablock.
In Cycles this mode is implemented by replacing the attribute node with
the attribute value during sync, allowing constant folding to take the
values into account. This means however that materials that use this
feature have to be re-synced upon any changes to scene, world or camera.
The Eevee version uses a new uniform buffer containing a sorted array
mapping name hashes to values, with binary search lookup. The array
is limited to 512 entries, which is effectively limitless even
considering it is shared by all materials in the scene; it is also
just 16KB of memory so no point trying to optimize further.
The buffer has to be rebuilt when new attributes are detected in a
material, so the draw engine keeps a table of recently seen attribute
names to minimize the chance of extra rebuilds mid-draw.
Differential Revision: https://developer.blender.org/D15941
2022-09-12 00:30:58 +03:00
|
|
|
if (!BLI_listbase_is_empty(&graph.layer_attrs)) {
|
|
|
|
|
info.additional_info("draw_layer_attributes");
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
info.typedef_source_generated = ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-22 10:24:10 +02:00
|
|
|
void GPUCodegen::node_serialize(Set<StringRefNull> &used_libraries,
|
|
|
|
|
std::stringstream &eval_ss,
|
|
|
|
|
const GPUNode *node)
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
2025-09-22 10:24:10 +02:00
|
|
|
gpu_material_library_use_function(used_libraries, node->name);
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2025-10-08 16:38:14 +02:00
|
|
|
auto source_reference = [&](GPUInput *input) {
|
|
|
|
|
BLI_assert(ELEM(input->source, GPU_SOURCE_OUTPUT, GPU_SOURCE_ATTR));
|
|
|
|
|
/* These inputs can have non matching types. Do conversion. */
|
|
|
|
|
GPUType to = input->type;
|
|
|
|
|
GPUType from = (input->source == GPU_SOURCE_ATTR) ? input->attr->gputype :
|
|
|
|
|
input->link->output->type;
|
|
|
|
|
if (from != to) {
|
|
|
|
|
/* Use defines declared inside codegen_lib (e.g. vec4_from_float). */
|
|
|
|
|
eval_ss << to << "_from_" << from << "(";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (input->source == GPU_SOURCE_ATTR) {
|
|
|
|
|
eval_ss << input;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
eval_ss << input->link->output;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (from != to) {
|
|
|
|
|
/* Special case that needs luminance coefficients as argument. */
|
|
|
|
|
if (from == GPU_VEC4 && to == GPU_FLOAT) {
|
|
|
|
|
float coefficients[3];
|
|
|
|
|
IMB_colormanagement_get_luminance_coefficients(coefficients);
|
|
|
|
|
eval_ss << ", " << blender::Span<float>(coefficients, 3);
|
|
|
|
|
}
|
|
|
|
|
eval_ss << ")";
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
/* Declare constants. */
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
2025-10-08 16:38:14 +02:00
|
|
|
auto type = [&]() {
|
|
|
|
|
/* Don't declare zone io variables twice. */
|
|
|
|
|
std::stringstream ss;
|
|
|
|
|
if (!input->is_duplicate) {
|
|
|
|
|
ss << input->type;
|
|
|
|
|
}
|
|
|
|
|
return ss.str();
|
|
|
|
|
};
|
2022-04-14 18:47:58 +02:00
|
|
|
switch (input->source) {
|
|
|
|
|
case GPU_SOURCE_FUNCTION_CALL:
|
2025-10-08 16:38:14 +02:00
|
|
|
eval_ss << type() << " " << input << "; " << input->function_call << input << ");\n";
|
2022-04-14 18:47:58 +02:00
|
|
|
break;
|
|
|
|
|
case GPU_SOURCE_STRUCT:
|
|
|
|
|
eval_ss << input->type << " " << input << " = CLOSURE_DEFAULT;\n";
|
|
|
|
|
break;
|
|
|
|
|
case GPU_SOURCE_CONSTANT:
|
2025-10-08 16:38:14 +02:00
|
|
|
if (!input->is_duplicate) {
|
|
|
|
|
eval_ss << type() << " " << input << " = " << (GPUConstant *)input << ";\n";
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
case GPU_SOURCE_OUTPUT:
|
|
|
|
|
case GPU_SOURCE_ATTR:
|
|
|
|
|
if (input->is_zone_io) {
|
|
|
|
|
eval_ss << type() << " " << input << " = ";
|
|
|
|
|
source_reference(input);
|
|
|
|
|
eval_ss << ";\n";
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
break;
|
|
|
|
|
default:
|
2025-10-08 16:38:14 +02:00
|
|
|
if (input->is_zone_io && (!input->is_duplicate || !input->link)) {
|
|
|
|
|
eval_ss << type() << " zone" << input->id << " = " << input << ";\n";
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/* Declare temporary variables for node output storage. */
|
|
|
|
|
LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
|
2025-10-08 16:38:14 +02:00
|
|
|
if (output->is_zone_io) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
eval_ss << output->type << " " << output << ";\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Function call. */
|
|
|
|
|
eval_ss << node->name << "(";
|
|
|
|
|
/* Input arguments. */
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
2025-10-08 16:38:14 +02:00
|
|
|
if (input->is_zone_io) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
switch (input->source) {
|
|
|
|
|
case GPU_SOURCE_OUTPUT:
|
|
|
|
|
case GPU_SOURCE_ATTR: {
|
2025-10-08 16:38:14 +02:00
|
|
|
source_reference(input);
|
2022-04-14 18:47:58 +02:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
eval_ss << input;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2025-10-08 16:38:14 +02:00
|
|
|
GPUOutput *output = static_cast<GPUOutput *>(node->outputs.first);
|
|
|
|
|
if ((input->next && !input->next->is_zone_io) || (output && !output->is_zone_io)) {
|
|
|
|
|
eval_ss << ", ";
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
/* Output arguments. */
|
|
|
|
|
LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
|
2025-10-08 16:38:14 +02:00
|
|
|
if (output->is_zone_io) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
eval_ss << output;
|
2025-10-08 16:38:14 +02:00
|
|
|
if (output->next && !output->next->is_zone_io) {
|
2022-04-14 18:47:58 +02:00
|
|
|
eval_ss << ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
eval_ss << ");\n\n";
|
2023-02-14 21:51:03 +01:00
|
|
|
|
|
|
|
|
/* Increment heuristic. */
|
|
|
|
|
nodes_total_++;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
2025-09-22 10:24:10 +02:00
|
|
|
static Vector<StringRefNull> set_to_vector_stable(Set<StringRefNull> &set)
|
|
|
|
|
{
|
|
|
|
|
Vector<StringRefNull> source_files;
|
|
|
|
|
for (const StringRefNull &str : set) {
|
|
|
|
|
source_files.append(str);
|
|
|
|
|
}
|
|
|
|
|
/* Sort dependencies to avoid random order causing shader caching to fail (see #108289). */
|
|
|
|
|
std::sort(source_files.begin(), source_files.end());
|
|
|
|
|
return source_files;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag,
|
|
|
|
|
GPUNodeLink *output_link,
|
|
|
|
|
const char *output_default)
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
2023-07-10 12:41:44 +02:00
|
|
|
if (output_link == nullptr && output_default == nullptr) {
|
2025-09-22 10:24:10 +02:00
|
|
|
return {};
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
2025-09-22 10:24:10 +02:00
|
|
|
Set<StringRefNull> used_libraries;
|
2022-04-14 18:47:58 +02:00
|
|
|
std::stringstream eval_ss;
|
2023-07-10 12:41:44 +02:00
|
|
|
bool has_nodes = false;
|
2022-04-14 18:47:58 +02:00
|
|
|
/* NOTE: The node order is already top to bottom (or left to right in node editor)
|
|
|
|
|
* because of the evaluation order inside ntreeExecGPUNodes(). */
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
if ((node->tag & tree_tag) == 0) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2025-09-22 10:24:10 +02:00
|
|
|
node_serialize(used_libraries, eval_ss, node);
|
2023-07-10 12:41:44 +02:00
|
|
|
has_nodes = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!has_nodes) {
|
2025-09-22 10:24:10 +02:00
|
|
|
return {};
|
2023-07-10 12:41:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (output_link) {
|
|
|
|
|
eval_ss << "return " << output_link->output << ";\n";
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* Default output in case there are only AOVs. */
|
|
|
|
|
eval_ss << "return " << output_default << ";\n";
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
2024-02-12 19:33:44 +01:00
|
|
|
std::string str = eval_ss.str();
|
|
|
|
|
BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
|
2025-09-22 10:24:10 +02:00
|
|
|
return {str, set_to_vector_stable(used_libraries)};
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
2025-09-22 10:24:10 +02:00
|
|
|
GPUGraphOutput GPUCodegen::graph_serialize(GPUNodeTag tree_tag)
|
2022-07-29 08:37:57 +02:00
|
|
|
{
|
|
|
|
|
std::stringstream eval_ss;
|
2025-09-22 10:24:10 +02:00
|
|
|
Set<StringRefNull> used_libraries;
|
2022-07-29 08:37:57 +02:00
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
if (node->tag & tree_tag) {
|
2025-09-22 10:24:10 +02:00
|
|
|
node_serialize(used_libraries, eval_ss, node);
|
2022-07-29 08:37:57 +02:00
|
|
|
}
|
|
|
|
|
}
|
2024-02-12 19:33:44 +01:00
|
|
|
std::string str = eval_ss.str();
|
|
|
|
|
BLI_hash_mm2a_add(&hm2a_, reinterpret_cast<const uchar *>(str.c_str()), str.size());
|
2025-09-22 10:24:10 +02:00
|
|
|
return {str, set_to_vector_stable(used_libraries)};
|
2022-07-29 08:37:57 +02:00
|
|
|
}
|
|
|
|
|
|
2022-09-13 11:07:30 +02:00
|
|
|
void GPUCodegen::generate_cryptomatte()
|
|
|
|
|
{
|
2025-04-12 17:17:24 +02:00
|
|
|
cryptomatte_input_ = MEM_callocN<GPUInput>(__func__);
|
2022-09-13 11:07:30 +02:00
|
|
|
cryptomatte_input_->type = GPU_FLOAT;
|
|
|
|
|
cryptomatte_input_->source = GPU_SOURCE_CRYPTOMATTE;
|
|
|
|
|
|
|
|
|
|
float material_hash = 0.0f;
|
|
|
|
|
Material *material = GPU_material_get_material(&mat);
|
|
|
|
|
if (material) {
|
2025-05-22 17:53:22 +02:00
|
|
|
bke::cryptomatte::CryptomatteHash hash(material->id.name + 2,
|
|
|
|
|
BLI_strnlen(material->id.name + 2, MAX_NAME - 2));
|
2022-09-13 11:07:30 +02:00
|
|
|
material_hash = hash.float_encoded();
|
|
|
|
|
}
|
|
|
|
|
cryptomatte_input_->vec[0] = material_hash;
|
|
|
|
|
|
|
|
|
|
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(cryptomatte_input_));
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
void GPUCodegen::generate_uniform_buffer()
|
|
|
|
|
{
|
|
|
|
|
/* Extract uniform inputs. */
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
|
|
|
|
if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
|
|
|
|
|
/* We handle the UBO uniforms separately. */
|
|
|
|
|
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
|
2023-02-14 21:51:03 +01:00
|
|
|
uniforms_total_++;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
|
|
|
|
|
/* This sorts the inputs based on size. */
|
|
|
|
|
GPU_material_uniform_buffer_create(&mat, &ubo_inputs_);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sets id for unique names for all inputs, resources and temp variables. */
|
|
|
|
|
void GPUCodegen::set_unique_ids()
|
|
|
|
|
{
|
2025-10-08 16:38:14 +02:00
|
|
|
blender::Map<int, GPUNode *> zone_starts;
|
|
|
|
|
blender::Map<int, GPUNode *> zone_ends;
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
int id = 1;
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
|
|
|
|
input->id = id++;
|
|
|
|
|
}
|
|
|
|
|
LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
|
|
|
|
|
output->id = id++;
|
|
|
|
|
}
|
2025-10-08 16:38:14 +02:00
|
|
|
if (node->zone_index != -1) {
|
|
|
|
|
auto &map = node->is_zone_end ? zone_ends : zone_starts;
|
|
|
|
|
map.add(node->zone_index, node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
auto find_zone_io = [](auto first) {
|
|
|
|
|
while (first && !first->is_zone_io && first->next) {
|
|
|
|
|
first = first->next;
|
|
|
|
|
}
|
|
|
|
|
return first;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* Assign the same id to inputs and outputs of start and end zones. */
|
|
|
|
|
for (GPUNode *end : zone_ends.values()) {
|
|
|
|
|
|
|
|
|
|
GPUInput *end_input = find_zone_io((GPUInput *)end->inputs.first);
|
|
|
|
|
GPUOutput *end_output = find_zone_io((GPUOutput *)end->outputs.first);
|
|
|
|
|
|
|
|
|
|
GPUNode *start = zone_starts.lookup(end->zone_index);
|
|
|
|
|
|
|
|
|
|
GPUInput *start_input = find_zone_io((GPUInput *)start->inputs.first);
|
|
|
|
|
GPUOutput *start_output = find_zone_io((GPUOutput *)start->outputs.first);
|
|
|
|
|
|
|
|
|
|
for (; start_input; start_input = start_input->next,
|
|
|
|
|
start_output = start_output->next,
|
|
|
|
|
end_input = end_input->next,
|
|
|
|
|
end_output = end_output->next)
|
|
|
|
|
{
|
|
|
|
|
start_output->id = start_input->id;
|
|
|
|
|
end_input->id = start_input->id;
|
|
|
|
|
end_output->id = start_input->id;
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GPUCodegen::generate_graphs()
|
|
|
|
|
{
|
|
|
|
|
set_unique_ids();
|
|
|
|
|
|
2023-07-10 12:41:44 +02:00
|
|
|
output.surface = graph_serialize(
|
|
|
|
|
GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface, "CLOSURE_DEFAULT");
|
|
|
|
|
output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume, "CLOSURE_DEFAULT");
|
|
|
|
|
output.displacement = graph_serialize(
|
|
|
|
|
GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement, nullptr);
|
|
|
|
|
output.thickness = graph_serialize(GPU_NODE_TAG_THICKNESS, graph.outlink_thickness, nullptr);
|
2022-07-29 08:37:57 +02:00
|
|
|
if (!BLI_listbase_is_empty(&graph.outlink_compositor)) {
|
|
|
|
|
output.composite = graph_serialize(GPU_NODE_TAG_COMPOSITOR);
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
if (!BLI_listbase_is_empty(&graph.material_functions)) {
|
|
|
|
|
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) {
|
2025-09-22 10:24:10 +02:00
|
|
|
std::stringstream eval_ss;
|
2022-11-14 12:21:37 +01:00
|
|
|
/* Untag every node in the graph to avoid serializing nodes from other functions */
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
node->tag &= ~GPU_NODE_TAG_FUNCTION;
|
|
|
|
|
}
|
|
|
|
|
/* Tag only the nodes needed for the current function */
|
2025-10-08 16:38:14 +02:00
|
|
|
gpu_nodes_tag(&graph, func_link->outlink, GPU_NODE_TAG_FUNCTION);
|
2025-09-22 10:24:10 +02:00
|
|
|
GPUGraphOutput graph = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
|
|
|
|
|
eval_ss << "float " << func_link->name << "() {\n" << graph.serialized << "}\n\n";
|
|
|
|
|
output.material_functions.append({eval_ss.str(), graph.dependencies});
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
2022-11-14 12:21:37 +01:00
|
|
|
/* Leave the function tags as they were before serialization */
|
|
|
|
|
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph.material_functions) {
|
2025-10-08 16:38:14 +02:00
|
|
|
gpu_nodes_tag(&graph, funclink->outlink, GPU_NODE_TAG_FUNCTION);
|
2022-11-14 12:21:37 +01:00
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
|
|
|
|
|
BLI_hash_mm2a_add(&hm2a_, (uchar *)attr->name, strlen(attr->name));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hash_ = BLI_hash_mm2a_end(&hm2a_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** \} */
|