2023-08-16 00:20:26 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2005 Blender Authors
|
2023-05-31 16:19:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
/** \file
|
|
|
|
|
* \ingroup gpu
|
|
|
|
|
*
|
|
|
|
|
* Convert material node-trees to GLSL.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include "MEM_guardedalloc.h"
|
|
|
|
|
|
|
|
|
|
#include "DNA_customdata_types.h"
|
|
|
|
|
#include "DNA_image_types.h"
|
2022-09-13 11:07:30 +02:00
|
|
|
#include "DNA_material_types.h"
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
#include "BLI_ghash.h"
|
|
|
|
|
#include "BLI_hash_mm2a.h"
|
|
|
|
|
#include "BLI_link_utils.h"
|
2023-09-01 21:37:11 +02:00
|
|
|
#include "BLI_listbase.h"
|
|
|
|
|
#include "BLI_string.h"
|
2022-04-14 18:47:58 +02:00
|
|
|
#include "BLI_threads.h"
|
|
|
|
|
#include "BLI_utildefines.h"
|
|
|
|
|
|
|
|
|
|
#include "PIL_time.h"
|
|
|
|
|
|
2022-09-13 11:07:30 +02:00
|
|
|
#include "BKE_cryptomatte.hh"
|
2022-04-14 18:47:58 +02:00
|
|
|
#include "BKE_material.h"
|
|
|
|
|
|
|
|
|
|
#include "GPU_capabilities.h"
|
2023-02-14 21:51:03 +01:00
|
|
|
#include "GPU_context.h"
|
2022-04-14 18:47:58 +02:00
|
|
|
#include "GPU_material.h"
|
|
|
|
|
#include "GPU_shader.h"
|
|
|
|
|
#include "GPU_uniform_buffer.h"
|
|
|
|
|
#include "GPU_vertex_format.h"
|
|
|
|
|
|
|
|
|
|
#include "BLI_sys_types.h" /* for intptr_t support */
|
2022-05-09 13:30:48 +02:00
|
|
|
#include "BLI_vector.hh"
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
#include "gpu_codegen.h"
|
|
|
|
|
#include "gpu_node_graph.h"
|
|
|
|
|
#include "gpu_shader_create_info.hh"
|
|
|
|
|
#include "gpu_shader_dependency_private.h"
|
|
|
|
|
|
2022-06-10 10:29:35 +02:00
|
|
|
#include <cstdarg>
|
|
|
|
|
#include <cstring>
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
#include <sstream>
|
|
|
|
|
#include <string>
|
|
|
|
|
|
|
|
|
|
using namespace blender::gpu::shader;
|
|
|
|
|
|
2022-04-27 15:59:18 +02:00
|
|
|
/**
|
|
|
|
|
* IMPORTANT: Never add external reference. The GPUMaterial used to create the GPUPass (and its
|
|
|
|
|
* GPUCodegenCreateInfo) can be free before actually compiling. This happens if there is an update
|
|
|
|
|
* before deferred compilation happens and the GPUPass gets picked up by another GPUMaterial
|
|
|
|
|
* (because of GPUPass reuse).
|
|
|
|
|
*/
|
2022-04-14 18:47:58 +02:00
|
|
|
struct GPUCodegenCreateInfo : ShaderCreateInfo {
|
|
|
|
|
struct NameBuffer {
|
2022-06-29 12:11:05 +02:00
|
|
|
using NameEntry = std::array<char, 32>;
|
|
|
|
|
|
2022-04-27 15:59:18 +02:00
|
|
|
/** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */
|
|
|
|
|
char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1];
|
2022-04-14 18:47:58 +02:00
|
|
|
char var_names[16][8];
|
2022-06-29 12:11:05 +02:00
|
|
|
blender::Vector<std::unique_ptr<NameEntry>, 16> sampler_names;
|
2022-05-09 13:30:48 +02:00
|
|
|
|
2022-05-09 20:06:12 +02:00
|
|
|
/* Returns the appended name memory location */
|
|
|
|
|
const char *append_sampler_name(const char name[32])
|
2022-05-09 13:30:48 +02:00
|
|
|
{
|
2022-06-29 12:11:05 +02:00
|
|
|
auto index = sampler_names.size();
|
|
|
|
|
sampler_names.append(std::make_unique<NameEntry>());
|
|
|
|
|
char *name_buffer = sampler_names[index]->data();
|
2022-05-09 20:06:12 +02:00
|
|
|
memcpy(name_buffer, name, 32);
|
|
|
|
|
return name_buffer;
|
2022-05-09 13:30:48 +02:00
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/** Optional generated interface. */
|
|
|
|
|
StageInterfaceInfo *interface_generated = nullptr;
|
|
|
|
|
/** Optional name buffer containing names referenced by StringRefNull. */
|
2022-05-09 20:06:12 +02:00
|
|
|
NameBuffer name_buffer;
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name){};
|
|
|
|
|
~GPUCodegenCreateInfo()
|
|
|
|
|
{
|
|
|
|
|
delete interface_generated;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct GPUPass {
|
2023-06-03 08:36:28 +10:00
|
|
|
GPUPass *next;
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
GPUShader *shader;
|
|
|
|
|
GPUCodegenCreateInfo *create_info = nullptr;
|
|
|
|
|
/** Orphaned GPUPasses gets freed by the garbage collector. */
|
|
|
|
|
uint refcount;
|
2023-04-10 11:39:00 +02:00
|
|
|
/** The last time the refcount was greater than 0. */
|
|
|
|
|
int gc_timestamp;
|
2022-04-14 18:47:58 +02:00
|
|
|
/** Identity hash generated from all GLSL code. */
|
|
|
|
|
uint32_t hash;
|
|
|
|
|
/** Did we already tried to compile the attached GPUShader. */
|
|
|
|
|
bool compiled;
|
2023-02-14 21:51:03 +01:00
|
|
|
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
|
|
|
|
|
* during pass code generation. */
|
|
|
|
|
bool should_optimize;
|
2023-03-16 08:19:32 +01:00
|
|
|
/** Whether pass is in the GPUPass cache. */
|
|
|
|
|
bool cached;
|
2022-04-14 18:47:58 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name GPUPass Cache
|
|
|
|
|
*
|
|
|
|
|
* Internal shader cache: This prevent the shader recompilation / stall when
|
|
|
|
|
* using undo/redo AND also allows for GPUPass reuse if the Shader code is the
|
|
|
|
|
* same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
|
2022-04-28 14:03:47 +10:00
|
|
|
* \{ */
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
/* Only use one linklist that contains the GPUPasses grouped by hash. */
|
|
|
|
|
static GPUPass *pass_cache = nullptr;
|
|
|
|
|
static SpinLock pass_cache_spin;
|
|
|
|
|
|
|
|
|
|
/* Search by hash only. Return first pass with the same hash.
|
|
|
|
|
* There is hash collision if (pass->next && pass->next->hash == hash) */
|
|
|
|
|
static GPUPass *gpu_pass_cache_lookup(uint32_t hash)
|
|
|
|
|
{
|
|
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
|
|
|
|
/* Could be optimized with a Lookup table. */
|
|
|
|
|
for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
|
|
|
|
|
if (pass->hash == hash) {
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
return pass;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
|
|
|
|
|
{
|
|
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
2023-03-16 08:19:32 +01:00
|
|
|
pass->cached = true;
|
2022-04-14 18:47:58 +02:00
|
|
|
if (node != nullptr) {
|
|
|
|
|
/* Add after the first pass having the same hash. */
|
|
|
|
|
pass->next = node->next;
|
|
|
|
|
node->next = pass;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* No other pass have same hash, just prepend to the list. */
|
|
|
|
|
BLI_LINKS_PREPEND(pass_cache, pass);
|
|
|
|
|
}
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Check all possible passes with the same hash. */
|
|
|
|
|
static GPUPass *gpu_pass_cache_resolve_collision(GPUPass *pass,
|
|
|
|
|
GPUShaderCreateInfo *info,
|
|
|
|
|
uint32_t hash)
|
|
|
|
|
{
|
|
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
|
|
|
|
for (; pass && (pass->hash == hash); pass = pass->next) {
|
|
|
|
|
if (*reinterpret_cast<ShaderCreateInfo *>(info) ==
|
|
|
|
|
*reinterpret_cast<ShaderCreateInfo *>(pass->create_info))
|
|
|
|
|
{
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
return pass;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool gpu_pass_is_valid(GPUPass *pass)
|
|
|
|
|
{
|
|
|
|
|
/* Shader is not null if compilation is successful. */
|
|
|
|
|
return (pass->compiled == false || pass->shader != nullptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Type > string conversion
|
|
|
|
|
* \{ */
|
|
|
|
|
|
2023-02-03 17:00:35 +01:00
|
|
|
#if 0
|
2022-11-15 13:06:58 +01:00
|
|
|
# define SRC_NAME(io, link, list, type) \
|
|
|
|
|
link->node->name << "_" << io << BLI_findindex(&link->node->list, (const void *)link) << "_" \
|
|
|
|
|
<< type
|
|
|
|
|
#else
|
|
|
|
|
# define SRC_NAME(io, list, link, type) type
|
|
|
|
|
#endif
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
static std::ostream &operator<<(std::ostream &stream, const GPUInput *input)
|
|
|
|
|
{
|
|
|
|
|
switch (input->source) {
|
|
|
|
|
case GPU_SOURCE_FUNCTION_CALL:
|
|
|
|
|
case GPU_SOURCE_OUTPUT:
|
2022-11-15 13:06:58 +01:00
|
|
|
return stream << SRC_NAME("in", input, inputs, "tmp") << input->id;
|
2022-04-14 18:47:58 +02:00
|
|
|
case GPU_SOURCE_CONSTANT:
|
2022-11-15 13:06:58 +01:00
|
|
|
return stream << SRC_NAME("in", input, inputs, "cons") << input->id;
|
2022-04-14 18:47:58 +02:00
|
|
|
case GPU_SOURCE_UNIFORM:
|
|
|
|
|
return stream << "node_tree.u" << input->id;
|
|
|
|
|
case GPU_SOURCE_ATTR:
|
|
|
|
|
return stream << "var_attrs.v" << input->attr->id;
|
|
|
|
|
case GPU_SOURCE_UNIFORM_ATTR:
|
2023-07-17 16:44:20 +02:00
|
|
|
return stream << "UNI_ATTR(unf_attrs[resource_id].attr" << input->uniform_attr->id << ")";
|
Attribute Node: support accessing attributes of View Layer and Scene.
The attribute node already allows accessing attributes associated
with objects and meshes, which allows changing the behavior of the
same material between different objects or instances. The same idea
can be extended to an even more global level of layers and scenes.
Currently view layers provide an option to replace all materials
with a different one. However, since the same material will be applied
to all objects in the layer, varying the behavior between layers while
preserving distinct materials requires duplicating objects.
Providing access to properties of layers and scenes via the attribute
node enables making materials with built-in switches or settings that
can be controlled globally at the view layer level. This is probably
most useful for complex NPR shading and compositing. Like with objects,
the node can also access built-in scene properties, like render resolution
or FOV of the active camera. Lookup is also attempted in World, similar
to how the Object mode checks the Mesh datablock.
In Cycles this mode is implemented by replacing the attribute node with
the attribute value during sync, allowing constant folding to take the
values into account. This means however that materials that use this
feature have to be re-synced upon any changes to scene, world or camera.
The Eevee version uses a new uniform buffer containing a sorted array
mapping name hashes to values, with binary search lookup. The array
is limited to 512 entries, which is effectively limitless even
considering it is shared by all materials in the scene; it is also
just 16KB of memory so no point trying to optimize further.
The buffer has to be rebuilt when new attributes are detected in a
material, so the draw engine keeps a table of recently seen attribute
names to minimize the chance of extra rebuilds mid-draw.
Differential Revision: https://developer.blender.org/D15941
2022-09-12 00:30:58 +03:00
|
|
|
case GPU_SOURCE_LAYER_ATTR:
|
|
|
|
|
return stream << "attr_load_layer(" << input->layer_attr->hash_code << ")";
|
2022-04-14 18:47:58 +02:00
|
|
|
case GPU_SOURCE_STRUCT:
|
|
|
|
|
return stream << "strct" << input->id;
|
|
|
|
|
case GPU_SOURCE_TEX:
|
|
|
|
|
return stream << input->texture->sampler_name;
|
|
|
|
|
case GPU_SOURCE_TEX_TILED_MAPPING:
|
|
|
|
|
return stream << input->texture->tiled_mapping_name;
|
|
|
|
|
default:
|
|
|
|
|
BLI_assert(0);
|
|
|
|
|
return stream;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
|
|
|
|
|
{
|
2022-11-15 13:06:58 +01:00
|
|
|
return stream << SRC_NAME("out", output, outputs, "tmp") << output->id;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Trick type to change overload and keep a somewhat nice syntax. */
|
2022-12-29 20:49:08 +13:00
|
|
|
struct GPUConstant : public GPUInput {
|
|
|
|
|
};
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
|
|
|
|
|
static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
|
|
|
|
|
{
|
|
|
|
|
stream << input->type << "(";
|
|
|
|
|
for (int i = 0; i < input->type; i++) {
|
2023-07-31 10:15:30 +10:00
|
|
|
char formatted_float[32];
|
2022-09-01 14:46:17 +02:00
|
|
|
/* Use uint representation to allow exact same bit pattern even if NaN. This is because we can
|
|
|
|
|
* pass UINTs as floats for constants. */
|
|
|
|
|
const uint32_t *uint_vec = reinterpret_cast<const uint32_t *>(input->vec);
|
2023-07-31 10:15:30 +10:00
|
|
|
SNPRINTF(formatted_float, "uintBitsToFloat(%uu)", uint_vec[i]);
|
|
|
|
|
stream << formatted_float;
|
2022-04-14 18:47:58 +02:00
|
|
|
if (i < input->type - 1) {
|
|
|
|
|
stream << ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
stream << ")";
|
|
|
|
|
return stream;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name GLSL code generation
|
|
|
|
|
* \{ */
|
|
|
|
|
|
|
|
|
|
class GPUCodegen {
|
|
|
|
|
public:
|
|
|
|
|
GPUMaterial &mat;
|
|
|
|
|
GPUNodeGraph &graph;
|
|
|
|
|
GPUCodegenOutput output = {};
|
|
|
|
|
GPUCodegenCreateInfo *create_info = nullptr;
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
uint32_t hash_ = 0;
|
|
|
|
|
BLI_HashMurmur2A hm2a_;
|
|
|
|
|
ListBase ubo_inputs_ = {nullptr, nullptr};
|
2022-09-13 11:07:30 +02:00
|
|
|
GPUInput *cryptomatte_input_ = nullptr;
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2023-02-15 13:11:14 +11:00
|
|
|
/** Cache parameters for complexity heuristic. */
|
2023-02-14 21:51:03 +01:00
|
|
|
uint nodes_total_ = 0;
|
|
|
|
|
uint textures_total_ = 0;
|
|
|
|
|
uint uniforms_total_ = 0;
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
public:
|
|
|
|
|
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
|
|
|
|
|
{
|
|
|
|
|
BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat));
|
|
|
|
|
BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat));
|
|
|
|
|
create_info = new GPUCodegenCreateInfo("codegen");
|
|
|
|
|
output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
|
|
|
|
|
static_cast<ShaderCreateInfo *>(create_info));
|
|
|
|
|
|
|
|
|
|
if (GPU_material_flag_get(mat_, GPU_MATFLAG_OBJECT_INFO)) {
|
|
|
|
|
create_info->additional_info("draw_object_infos");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
~GPUCodegen()
|
|
|
|
|
{
|
|
|
|
|
MEM_SAFE_FREE(output.attr_load);
|
|
|
|
|
MEM_SAFE_FREE(output.surface);
|
|
|
|
|
MEM_SAFE_FREE(output.volume);
|
|
|
|
|
MEM_SAFE_FREE(output.thickness);
|
|
|
|
|
MEM_SAFE_FREE(output.displacement);
|
2022-08-22 10:57:24 +02:00
|
|
|
MEM_SAFE_FREE(output.composite);
|
2022-04-14 18:47:58 +02:00
|
|
|
MEM_SAFE_FREE(output.material_functions);
|
2022-09-13 11:07:30 +02:00
|
|
|
MEM_SAFE_FREE(cryptomatte_input_);
|
2022-04-14 18:47:58 +02:00
|
|
|
delete create_info;
|
|
|
|
|
BLI_freelistN(&ubo_inputs_);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
void generate_graphs();
|
2022-09-13 11:07:30 +02:00
|
|
|
void generate_cryptomatte();
|
2022-04-14 18:47:58 +02:00
|
|
|
void generate_uniform_buffer();
|
|
|
|
|
void generate_attribs();
|
|
|
|
|
void generate_resources();
|
|
|
|
|
void generate_library();
|
|
|
|
|
|
|
|
|
|
uint32_t hash_get() const
|
|
|
|
|
{
|
|
|
|
|
return hash_;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-14 21:51:03 +01:00
|
|
|
/* Heuristic determined during pass codegen for whether a
|
|
|
|
|
* more optimal variant of this material should be compiled. */
|
|
|
|
|
bool should_optimize_heuristic() const
|
|
|
|
|
{
|
|
|
|
|
/* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
|
2023-02-27 21:44:59 +11:00
|
|
|
* the baseline is met. */
|
2023-02-14 21:51:03 +01:00
|
|
|
bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
|
|
|
|
|
(textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
|
|
|
|
|
return do_optimize;
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
private:
|
|
|
|
|
void set_unique_ids();
|
|
|
|
|
|
|
|
|
|
void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
|
2023-07-10 12:41:44 +02:00
|
|
|
char *graph_serialize(eGPUNodeTag tree_tag,
|
|
|
|
|
GPUNodeLink *output_link,
|
|
|
|
|
const char *output_default = nullptr);
|
2022-07-29 08:37:57 +02:00
|
|
|
char *graph_serialize(eGPUNodeTag tree_tag);
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
static char *extract_c_str(std::stringstream &stream)
|
|
|
|
|
{
|
|
|
|
|
auto string = stream.str();
|
|
|
|
|
return BLI_strdup(string.c_str());
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
void GPUCodegen::generate_attribs()
|
|
|
|
|
{
|
|
|
|
|
if (BLI_listbase_is_empty(&graph.attributes)) {
|
|
|
|
|
output.attr_load = nullptr;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GPUCodegenCreateInfo &info = *create_info;
|
|
|
|
|
|
|
|
|
|
info.interface_generated = new StageInterfaceInfo("codegen_iface", "var_attrs");
|
|
|
|
|
StageInterfaceInfo &iface = *info.interface_generated;
|
|
|
|
|
info.vertex_out(iface);
|
|
|
|
|
|
|
|
|
|
/* Input declaration, loading / assignment to interface and geometry shader passthrough. */
|
2022-07-01 10:10:36 +02:00
|
|
|
std::stringstream load_ss;
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
int slot = 15;
|
|
|
|
|
LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
|
2022-04-27 15:59:18 +02:00
|
|
|
if (slot == -1) {
|
|
|
|
|
BLI_assert_msg(0, "Too many attributes");
|
|
|
|
|
break;
|
|
|
|
|
}
|
2022-05-09 20:06:12 +02:00
|
|
|
STRNCPY(info.name_buffer.attr_names[slot], attr->input_name);
|
|
|
|
|
SNPRINTF(info.name_buffer.var_names[slot], "v%d", attr->id);
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2022-05-09 20:06:12 +02:00
|
|
|
blender::StringRefNull attr_name = info.name_buffer.attr_names[slot];
|
|
|
|
|
blender::StringRefNull var_name = info.name_buffer.var_names[slot];
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
eGPUType input_type, iface_type;
|
|
|
|
|
|
|
|
|
|
load_ss << "var_attrs." << var_name;
|
2023-06-28 17:17:31 +02:00
|
|
|
if (attr->is_hair_length) {
|
|
|
|
|
iface_type = input_type = GPU_FLOAT;
|
|
|
|
|
load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n";
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
switch (attr->type) {
|
|
|
|
|
case CD_ORCO:
|
|
|
|
|
/* Need vec4 to detect usage of default attribute. */
|
|
|
|
|
input_type = GPU_VEC4;
|
|
|
|
|
iface_type = GPU_VEC3;
|
|
|
|
|
load_ss << " = attr_load_orco(" << attr_name << ");\n";
|
|
|
|
|
break;
|
|
|
|
|
case CD_TANGENT:
|
|
|
|
|
iface_type = input_type = GPU_VEC4;
|
|
|
|
|
load_ss << " = attr_load_tangent(" << attr_name << ");\n";
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
iface_type = input_type = GPU_VEC4;
|
|
|
|
|
load_ss << " = attr_load_" << input_type << "(" << attr_name << ");\n";
|
|
|
|
|
break;
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
info.vertex_in(slot--, to_type(input_type), attr_name);
|
|
|
|
|
iface.smooth(to_type(iface_type), var_name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
output.attr_load = extract_c_str(load_ss);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GPUCodegen::generate_resources()
|
|
|
|
|
{
|
|
|
|
|
GPUCodegenCreateInfo &info = *create_info;
|
|
|
|
|
|
2023-02-12 14:37:16 +11:00
|
|
|
/* Ref. #98190: Defines are optimizations for old compilers.
|
2022-09-06 16:25:20 +10:00
|
|
|
* Might become unnecessary with EEVEE-Next. */
|
Cycles: Rework Principled BSDF Clearcoat
- Adds tint control, which simulates volumetric absorption inside the coating.
This results in angle-dependent saturation and affects all underlying layers
(diffuse, subsurface, metallic, transmission). It provides a physically-based
alternative to ad-hoc effects such as tinted specular highlights.
- Renames the component from "Clearcoat" to "Coat", since it's no longer
necessarily clear now. This matches naming in e.g. other renderers or OpenPBR.
- Adds an explicit Coat IOR input, in preparation for future smarter IOR logic
around the interaction between Coat and main IOR. This used to be hardcoded
to 1.5.
- Removes hardcoded 0.25 weight multiplier, and adds versioning code to update
existing files accordingly. OBJ import/export still applies the factor.
- Replaces the GTR1 microfacet component with regular GGX. This removes a corner
case in the Microfacet code, solves #53038, and makes us more consistent with
other standard surface shaders. The original Disney BSDF used GTR1, but it
doesn't appear that it caught on in the industry.
Co-authored-by: Weizhen Huang <weizhen@blender.org>
Pull Request: https://projects.blender.org/blender/blender/pulls/110993
2023-09-13 00:03:11 +02:00
|
|
|
if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_COAT)) {
|
|
|
|
|
info.define("PRINCIPLED_COAT");
|
2022-09-02 13:48:55 +02:00
|
|
|
}
|
|
|
|
|
if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_METALLIC)) {
|
|
|
|
|
info.define("PRINCIPLED_METALLIC");
|
|
|
|
|
}
|
|
|
|
|
if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_DIELECTRIC)) {
|
|
|
|
|
info.define("PRINCIPLED_DIELECTRIC");
|
|
|
|
|
}
|
|
|
|
|
if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_GLASS)) {
|
|
|
|
|
info.define("PRINCIPLED_GLASS");
|
|
|
|
|
}
|
|
|
|
|
if (GPU_material_flag_get(&mat, GPU_MATFLAG_PRINCIPLED_ANY)) {
|
|
|
|
|
info.define("PRINCIPLED_ANY");
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
std::stringstream ss;
|
|
|
|
|
|
|
|
|
|
/* Textures. */
|
2022-08-30 22:01:52 +02:00
|
|
|
int slot = 0;
|
2022-04-14 18:47:58 +02:00
|
|
|
LISTBASE_FOREACH (GPUMaterialTexture *, tex, &graph.textures) {
|
|
|
|
|
if (tex->colorband) {
|
2022-05-09 20:06:12 +02:00
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
2022-08-30 22:01:52 +02:00
|
|
|
info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
2022-09-16 15:04:47 +02:00
|
|
|
else if (tex->sky) {
|
|
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
|
|
|
|
info.sampler(0, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH);
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
else if (tex->tiled_mapping_name[0] != '\0') {
|
2022-05-09 20:06:12 +02:00
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
2022-08-30 22:01:52 +02:00
|
|
|
info.sampler(slot++, ImageType::FLOAT_2D_ARRAY, name, Frequency::BATCH);
|
2022-05-09 20:06:12 +02:00
|
|
|
|
|
|
|
|
const char *name_mapping = info.name_buffer.append_sampler_name(tex->tiled_mapping_name);
|
2022-08-30 22:01:52 +02:00
|
|
|
info.sampler(slot++, ImageType::FLOAT_1D_ARRAY, name_mapping, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
else {
|
2022-05-09 20:06:12 +02:00
|
|
|
const char *name = info.name_buffer.append_sampler_name(tex->sampler_name);
|
2022-08-30 22:01:52 +02:00
|
|
|
info.sampler(slot++, ImageType::FLOAT_2D, name, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-14 21:51:03 +01:00
|
|
|
/* Increment heuristic. */
|
|
|
|
|
textures_total_ = slot;
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
|
|
|
|
|
/* NOTE: generate_uniform_buffer() should have sorted the inputs before this. */
|
|
|
|
|
ss << "struct NodeTree {\n";
|
|
|
|
|
LISTBASE_FOREACH (LinkData *, link, &ubo_inputs_) {
|
|
|
|
|
GPUInput *input = (GPUInput *)(link->data);
|
2022-09-13 11:07:30 +02:00
|
|
|
if (input->source == GPU_SOURCE_CRYPTOMATTE) {
|
|
|
|
|
ss << input->type << " crypto_hash;\n";
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
ss << input->type << " u" << input->id << ";\n";
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
ss << "};\n\n";
|
|
|
|
|
|
2023-06-29 21:10:08 +02:00
|
|
|
info.uniform_buf(GPU_NODE_TREE_UBO_SLOT, "NodeTree", GPU_UBO_BLOCK_NAME, Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!BLI_listbase_is_empty(&graph.uniform_attrs.list)) {
|
|
|
|
|
ss << "struct UniformAttrs {\n";
|
|
|
|
|
LISTBASE_FOREACH (GPUUniformAttr *, attr, &graph.uniform_attrs.list) {
|
|
|
|
|
ss << "vec4 attr" << attr->id << ";\n";
|
|
|
|
|
}
|
|
|
|
|
ss << "};\n\n";
|
|
|
|
|
|
|
|
|
|
/* TODO(fclem): Use the macro for length. Currently not working for EEVEE. */
|
|
|
|
|
/* DRW_RESOURCE_CHUNK_LEN = 512 */
|
2022-08-30 22:01:52 +02:00
|
|
|
info.uniform_buf(2, "UniformAttrs", GPU_ATTRIBUTE_UBO_BLOCK_NAME "[512]", Frequency::BATCH);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
Attribute Node: support accessing attributes of View Layer and Scene.
The attribute node already allows accessing attributes associated
with objects and meshes, which allows changing the behavior of the
same material between different objects or instances. The same idea
can be extended to an even more global level of layers and scenes.
Currently view layers provide an option to replace all materials
with a different one. However, since the same material will be applied
to all objects in the layer, varying the behavior between layers while
preserving distinct materials requires duplicating objects.
Providing access to properties of layers and scenes via the attribute
node enables making materials with built-in switches or settings that
can be controlled globally at the view layer level. This is probably
most useful for complex NPR shading and compositing. Like with objects,
the node can also access built-in scene properties, like render resolution
or FOV of the active camera. Lookup is also attempted in World, similar
to how the Object mode checks the Mesh datablock.
In Cycles this mode is implemented by replacing the attribute node with
the attribute value during sync, allowing constant folding to take the
values into account. This means however that materials that use this
feature have to be re-synced upon any changes to scene, world or camera.
The Eevee version uses a new uniform buffer containing a sorted array
mapping name hashes to values, with binary search lookup. The array
is limited to 512 entries, which is effectively limitless even
considering it is shared by all materials in the scene; it is also
just 16KB of memory so no point trying to optimize further.
The buffer has to be rebuilt when new attributes are detected in a
material, so the draw engine keeps a table of recently seen attribute
names to minimize the chance of extra rebuilds mid-draw.
Differential Revision: https://developer.blender.org/D15941
2022-09-12 00:30:58 +03:00
|
|
|
if (!BLI_listbase_is_empty(&graph.layer_attrs)) {
|
|
|
|
|
info.additional_info("draw_layer_attributes");
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
info.typedef_source_generated = ss.str();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GPUCodegen::generate_library()
|
|
|
|
|
{
|
|
|
|
|
GPUCodegenCreateInfo &info = *create_info;
|
|
|
|
|
|
|
|
|
|
void *value;
|
2023-05-25 20:37:42 +02:00
|
|
|
blender::Vector<std::string> source_files;
|
|
|
|
|
|
|
|
|
|
/* Iterate over libraries. We need to keep this struct intact in case it is required for the
|
|
|
|
|
* optimization pass. The first pass just collects the keys from the GSET, given items in a GSET
|
|
|
|
|
* are unordered this can cause order differences between invocations, so we collect the keys
|
|
|
|
|
* first, and sort them before doing actual work, to guarantee stable behavior while still
|
|
|
|
|
* having cheap insertions into the GSET */
|
2023-02-14 21:51:03 +01:00
|
|
|
GHashIterator *ihash = BLI_ghashIterator_new((GHash *)graph.used_libraries);
|
|
|
|
|
while (!BLI_ghashIterator_done(ihash)) {
|
|
|
|
|
value = BLI_ghashIterator_getKey(ihash);
|
2023-05-25 20:37:42 +02:00
|
|
|
source_files.append((const char *)value);
|
2023-02-14 21:51:03 +01:00
|
|
|
BLI_ghashIterator_step(ihash);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
2023-02-14 21:51:03 +01:00
|
|
|
BLI_ghashIterator_free(ihash);
|
2023-05-25 20:37:42 +02:00
|
|
|
|
|
|
|
|
std::sort(source_files.begin(), source_files.end());
|
|
|
|
|
for (auto &key : source_files) {
|
|
|
|
|
auto deps = gpu_shader_dependency_get_resolved_source(key.c_str());
|
|
|
|
|
info.dependencies_generated.extend_non_duplicates(deps);
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
|
|
|
|
|
{
|
|
|
|
|
/* Declare constants. */
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
|
|
|
|
switch (input->source) {
|
|
|
|
|
case GPU_SOURCE_FUNCTION_CALL:
|
|
|
|
|
eval_ss << input->type << " " << input << "; " << input->function_call << input << ");\n";
|
|
|
|
|
break;
|
|
|
|
|
case GPU_SOURCE_STRUCT:
|
|
|
|
|
eval_ss << input->type << " " << input << " = CLOSURE_DEFAULT;\n";
|
|
|
|
|
break;
|
|
|
|
|
case GPU_SOURCE_CONSTANT:
|
|
|
|
|
eval_ss << input->type << " " << input << " = " << (GPUConstant *)input << ";\n";
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/* Declare temporary variables for node output storage. */
|
|
|
|
|
LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
|
|
|
|
|
eval_ss << output->type << " " << output << ";\n";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Function call. */
|
|
|
|
|
eval_ss << node->name << "(";
|
|
|
|
|
/* Input arguments. */
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
|
|
|
|
switch (input->source) {
|
|
|
|
|
case GPU_SOURCE_OUTPUT:
|
|
|
|
|
case GPU_SOURCE_ATTR: {
|
|
|
|
|
/* These inputs can have non matching types. Do conversion. */
|
|
|
|
|
eGPUType to = input->type;
|
|
|
|
|
eGPUType from = (input->source == GPU_SOURCE_ATTR) ? input->attr->gputype :
|
|
|
|
|
input->link->output->type;
|
|
|
|
|
if (from != to) {
|
|
|
|
|
/* Use defines declared inside codegen_lib (i.e: vec4_from_float). */
|
|
|
|
|
eval_ss << to << "_from_" << from << "(";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (input->source == GPU_SOURCE_ATTR) {
|
|
|
|
|
eval_ss << input;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
eval_ss << input->link->output;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (from != to) {
|
|
|
|
|
eval_ss << ")";
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
default:
|
|
|
|
|
eval_ss << input;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
eval_ss << ", ";
|
|
|
|
|
}
|
|
|
|
|
/* Output arguments. */
|
|
|
|
|
LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
|
|
|
|
|
eval_ss << output;
|
|
|
|
|
if (output->next) {
|
|
|
|
|
eval_ss << ", ";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
eval_ss << ");\n\n";
|
2023-02-14 21:51:03 +01:00
|
|
|
|
|
|
|
|
/* Increment heuristic. */
|
|
|
|
|
nodes_total_++;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
2023-07-10 12:41:44 +02:00
|
|
|
char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag,
|
|
|
|
|
GPUNodeLink *output_link,
|
|
|
|
|
const char *output_default)
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
2023-07-10 12:41:44 +02:00
|
|
|
if (output_link == nullptr && output_default == nullptr) {
|
2022-04-14 18:47:58 +02:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::stringstream eval_ss;
|
2023-07-10 12:41:44 +02:00
|
|
|
bool has_nodes = false;
|
2022-04-14 18:47:58 +02:00
|
|
|
/* NOTE: The node order is already top to bottom (or left to right in node editor)
|
|
|
|
|
* because of the evaluation order inside ntreeExecGPUNodes(). */
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
if ((node->tag & tree_tag) == 0) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
node_serialize(eval_ss, node);
|
2023-07-10 12:41:44 +02:00
|
|
|
has_nodes = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!has_nodes) {
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (output_link) {
|
|
|
|
|
eval_ss << "return " << output_link->output << ";\n";
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* Default output in case there are only AOVs. */
|
|
|
|
|
eval_ss << "return " << output_default << ";\n";
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
char *eval_c_str = extract_c_str(eval_ss);
|
|
|
|
|
BLI_hash_mm2a_add(&hm2a_, (uchar *)eval_c_str, eval_ss.str().size());
|
|
|
|
|
return eval_c_str;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-29 08:37:57 +02:00
|
|
|
char *GPUCodegen::graph_serialize(eGPUNodeTag tree_tag)
|
|
|
|
|
{
|
|
|
|
|
std::stringstream eval_ss;
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
if (node->tag & tree_tag) {
|
|
|
|
|
node_serialize(eval_ss, node);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
char *eval_c_str = extract_c_str(eval_ss);
|
|
|
|
|
BLI_hash_mm2a_add(&hm2a_, (uchar *)eval_c_str, eval_ss.str().size());
|
|
|
|
|
return eval_c_str;
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-13 11:07:30 +02:00
|
|
|
void GPUCodegen::generate_cryptomatte()
|
|
|
|
|
{
|
|
|
|
|
cryptomatte_input_ = static_cast<GPUInput *>(MEM_callocN(sizeof(GPUInput), __func__));
|
|
|
|
|
cryptomatte_input_->type = GPU_FLOAT;
|
|
|
|
|
cryptomatte_input_->source = GPU_SOURCE_CRYPTOMATTE;
|
|
|
|
|
|
|
|
|
|
float material_hash = 0.0f;
|
|
|
|
|
Material *material = GPU_material_get_material(&mat);
|
|
|
|
|
if (material) {
|
|
|
|
|
blender::bke::cryptomatte::CryptomatteHash hash(material->id.name,
|
|
|
|
|
BLI_strnlen(material->id.name, MAX_NAME - 2));
|
|
|
|
|
material_hash = hash.float_encoded();
|
|
|
|
|
}
|
|
|
|
|
cryptomatte_input_->vec[0] = material_hash;
|
|
|
|
|
|
|
|
|
|
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(cryptomatte_input_));
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
void GPUCodegen::generate_uniform_buffer()
|
|
|
|
|
{
|
|
|
|
|
/* Extract uniform inputs. */
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
|
|
|
|
if (input->source == GPU_SOURCE_UNIFORM && !input->link) {
|
|
|
|
|
/* We handle the UBO uniforms separately. */
|
|
|
|
|
BLI_addtail(&ubo_inputs_, BLI_genericNodeN(input));
|
2023-02-14 21:51:03 +01:00
|
|
|
uniforms_total_++;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!BLI_listbase_is_empty(&ubo_inputs_)) {
|
|
|
|
|
/* This sorts the inputs based on size. */
|
|
|
|
|
GPU_material_uniform_buffer_create(&mat, &ubo_inputs_);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sets id for unique names for all inputs, resources and temp variables. */
|
|
|
|
|
void GPUCodegen::set_unique_ids()
|
|
|
|
|
{
|
|
|
|
|
int id = 1;
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
LISTBASE_FOREACH (GPUInput *, input, &node->inputs) {
|
|
|
|
|
input->id = id++;
|
|
|
|
|
}
|
|
|
|
|
LISTBASE_FOREACH (GPUOutput *, output, &node->outputs) {
|
|
|
|
|
output->id = id++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void GPUCodegen::generate_graphs()
|
|
|
|
|
{
|
|
|
|
|
set_unique_ids();
|
|
|
|
|
|
2023-07-10 12:41:44 +02:00
|
|
|
output.surface = graph_serialize(
|
|
|
|
|
GPU_NODE_TAG_SURFACE | GPU_NODE_TAG_AOV, graph.outlink_surface, "CLOSURE_DEFAULT");
|
|
|
|
|
output.volume = graph_serialize(GPU_NODE_TAG_VOLUME, graph.outlink_volume, "CLOSURE_DEFAULT");
|
|
|
|
|
output.displacement = graph_serialize(
|
|
|
|
|
GPU_NODE_TAG_DISPLACEMENT, graph.outlink_displacement, nullptr);
|
|
|
|
|
output.thickness = graph_serialize(GPU_NODE_TAG_THICKNESS, graph.outlink_thickness, nullptr);
|
2022-07-29 08:37:57 +02:00
|
|
|
if (!BLI_listbase_is_empty(&graph.outlink_compositor)) {
|
|
|
|
|
output.composite = graph_serialize(GPU_NODE_TAG_COMPOSITOR);
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
if (!BLI_listbase_is_empty(&graph.material_functions)) {
|
|
|
|
|
std::stringstream eval_ss;
|
|
|
|
|
eval_ss << "\n/* Generated Functions */\n\n";
|
|
|
|
|
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, func_link, &graph.material_functions) {
|
2022-11-14 12:21:37 +01:00
|
|
|
/* Untag every node in the graph to avoid serializing nodes from other functions */
|
|
|
|
|
LISTBASE_FOREACH (GPUNode *, node, &graph.nodes) {
|
|
|
|
|
node->tag &= ~GPU_NODE_TAG_FUNCTION;
|
|
|
|
|
}
|
|
|
|
|
/* Tag only the nodes needed for the current function */
|
|
|
|
|
gpu_nodes_tag(func_link->outlink, GPU_NODE_TAG_FUNCTION);
|
2022-04-14 18:47:58 +02:00
|
|
|
char *fn = graph_serialize(GPU_NODE_TAG_FUNCTION, func_link->outlink);
|
|
|
|
|
eval_ss << "float " << func_link->name << "() {\n" << fn << "}\n\n";
|
|
|
|
|
MEM_SAFE_FREE(fn);
|
|
|
|
|
}
|
|
|
|
|
output.material_functions = extract_c_str(eval_ss);
|
2022-11-14 12:21:37 +01:00
|
|
|
/* Leave the function tags as they were before serialization */
|
|
|
|
|
LISTBASE_FOREACH (GPUNodeGraphFunctionLink *, funclink, &graph.material_functions) {
|
|
|
|
|
gpu_nodes_tag(funclink->outlink, GPU_NODE_TAG_FUNCTION);
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LISTBASE_FOREACH (GPUMaterialAttribute *, attr, &graph.attributes) {
|
|
|
|
|
BLI_hash_mm2a_add(&hm2a_, (uchar *)attr->name, strlen(attr->name));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hash_ = BLI_hash_mm2a_end(&hm2a_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name GPUPass
|
|
|
|
|
* \{ */
|
|
|
|
|
|
|
|
|
|
GPUPass *GPU_generate_pass(GPUMaterial *material,
|
|
|
|
|
GPUNodeGraph *graph,
|
|
|
|
|
GPUCodegenCallbackFn finalize_source_cb,
|
2023-02-14 21:51:03 +01:00
|
|
|
void *thunk,
|
|
|
|
|
bool optimize_graph)
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
|
|
|
|
gpu_node_graph_prune_unused(graph);
|
2022-07-29 08:37:57 +02:00
|
|
|
|
2023-02-14 21:51:03 +01:00
|
|
|
/* If Optimize flag is passed in, we are generating an optimized
|
|
|
|
|
* variant of the GPUMaterial's GPUPass. */
|
|
|
|
|
if (optimize_graph) {
|
|
|
|
|
gpu_node_graph_optimize(graph);
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-29 08:37:57 +02:00
|
|
|
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
|
|
|
|
|
* shader. */
|
2022-04-14 18:47:58 +02:00
|
|
|
gpu_node_graph_finalize_uniform_attrs(graph);
|
|
|
|
|
|
|
|
|
|
GPUCodegen codegen(material, graph);
|
|
|
|
|
codegen.generate_graphs();
|
2022-09-13 11:07:30 +02:00
|
|
|
codegen.generate_cryptomatte();
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2023-02-14 21:51:03 +01:00
|
|
|
GPUPass *pass_hash = nullptr;
|
|
|
|
|
|
|
|
|
|
if (!optimize_graph) {
|
|
|
|
|
/* The optimized version of the shader should not re-generate a UBO.
|
|
|
|
|
* The UBO will not be used for this variant. */
|
|
|
|
|
codegen.generate_uniform_buffer();
|
|
|
|
|
|
|
|
|
|
/** Cache lookup: Reuse shaders already compiled.
|
|
|
|
|
* NOTE: We only perform cache look-up for non-optimized shader
|
2023-02-15 13:11:14 +11:00
|
|
|
* graphs, as baked constant data among other optimizations will generate too many
|
2023-02-14 21:51:03 +01:00
|
|
|
* shader source permutations, with minimal re-usability. */
|
|
|
|
|
pass_hash = gpu_pass_cache_lookup(codegen.hash_get());
|
|
|
|
|
|
|
|
|
|
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
|
|
|
|
|
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
|
|
|
|
|
if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
|
|
|
|
|
if (!gpu_pass_is_valid(pass_hash)) {
|
|
|
|
|
/* Shader has already been created but failed to compile. */
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
/* No collision, just return the pass. */
|
|
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
|
|
|
|
pass_hash->refcount += 1;
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
return pass_hash;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Either the shader is not compiled or there is a hash collision...
|
|
|
|
|
* continue generating the shader strings. */
|
|
|
|
|
codegen.generate_attribs();
|
|
|
|
|
codegen.generate_resources();
|
|
|
|
|
codegen.generate_library();
|
|
|
|
|
|
|
|
|
|
/* Make engine add its own code and implement the generated functions. */
|
|
|
|
|
finalize_source_cb(thunk, material, &codegen.output);
|
|
|
|
|
|
|
|
|
|
GPUPass *pass = nullptr;
|
|
|
|
|
if (pass_hash) {
|
|
|
|
|
/* Cache lookup: Reuse shaders already compiled. */
|
|
|
|
|
pass = gpu_pass_cache_resolve_collision(
|
|
|
|
|
pass_hash, codegen.output.create_info, codegen.hash_get());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pass) {
|
|
|
|
|
/* Cache hit. Reuse the same GPUPass and GPUShader. */
|
|
|
|
|
if (!gpu_pass_is_valid(pass)) {
|
|
|
|
|
/* Shader has already been created but failed to compile. */
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
2022-04-27 15:29:53 +02:00
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
2022-04-14 18:47:58 +02:00
|
|
|
pass->refcount += 1;
|
2022-04-27 15:29:53 +02:00
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* We still create a pass even if shader compilation
|
|
|
|
|
* fails to avoid trying to compile again and again. */
|
|
|
|
|
pass = (GPUPass *)MEM_callocN(sizeof(GPUPass), "GPUPass");
|
|
|
|
|
pass->shader = nullptr;
|
|
|
|
|
pass->refcount = 1;
|
|
|
|
|
pass->create_info = codegen.create_info;
|
|
|
|
|
pass->hash = codegen.hash_get();
|
|
|
|
|
pass->compiled = false;
|
2023-03-16 08:19:32 +01:00
|
|
|
pass->cached = false;
|
2023-02-14 21:51:03 +01:00
|
|
|
/* Only flag pass optimization hint if this is the first generated pass for a material.
|
|
|
|
|
* Optimized passes cannot be optimized further, even if the heuristic is still not
|
2023-02-15 13:11:14 +11:00
|
|
|
* favorable. */
|
2023-02-14 21:51:03 +01:00
|
|
|
pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
codegen.create_info = nullptr;
|
|
|
|
|
|
2023-02-14 21:51:03 +01:00
|
|
|
/* Only insert non-optimized graphs into cache.
|
|
|
|
|
* Optimized graphs will continuously be recompiled with new unique source during material
|
|
|
|
|
* editing, and thus causing the cache to fill up quickly with materials offering minimal
|
|
|
|
|
* re-use. */
|
|
|
|
|
if (!optimize_graph) {
|
|
|
|
|
gpu_pass_cache_insert_after(pass_hash, pass);
|
|
|
|
|
}
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
return pass;
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-14 21:51:03 +01:00
|
|
|
bool GPU_pass_should_optimize(GPUPass *pass)
|
|
|
|
|
{
|
|
|
|
|
/* Returns optimization heuristic prepared during
|
|
|
|
|
* initial codegen.
|
|
|
|
|
* NOTE: Optimization currently limited to Metal backend as repeated compilations required for
|
|
|
|
|
* material specialization cause impactful CPU stalls on OpenGL platforms. */
|
|
|
|
|
return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-14 18:47:58 +02:00
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Compilation
|
|
|
|
|
* \{ */
|
|
|
|
|
|
|
|
|
|
static int count_active_texture_sampler(GPUPass *pass, GPUShader *shader)
|
|
|
|
|
{
|
|
|
|
|
int num_samplers = 0;
|
|
|
|
|
|
|
|
|
|
for (const ShaderCreateInfo::Resource &res : pass->create_info->pass_resources_) {
|
|
|
|
|
if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
|
|
|
|
|
if (GPU_shader_get_uniform(shader, res.sampler.name.c_str()) != -1) {
|
|
|
|
|
num_samplers += 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return num_samplers;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
|
|
|
|
|
{
|
|
|
|
|
if (shader == nullptr) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NOTE: The only drawback of this method is that it will count a sampler
|
|
|
|
|
* used in the fragment shader and only declared (but not used) in the vertex
|
|
|
|
|
* shader as used by both. But this corner case is not happening for now. */
|
|
|
|
|
int active_samplers_len = count_active_texture_sampler(pass, shader);
|
|
|
|
|
|
|
|
|
|
/* Validate against opengl limit. */
|
|
|
|
|
if ((active_samplers_len > GPU_max_textures_frag()) ||
|
|
|
|
|
(active_samplers_len > GPU_max_textures_vert()))
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (pass->create_info->geometry_source_.is_empty() == false) {
|
|
|
|
|
if (active_samplers_len > GPU_max_textures_geom()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (active_samplers_len * 3 <= GPU_max_textures());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool GPU_pass_compile(GPUPass *pass, const char *shname)
|
|
|
|
|
{
|
|
|
|
|
bool success = true;
|
|
|
|
|
if (!pass->compiled) {
|
|
|
|
|
GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
|
|
|
|
|
static_cast<ShaderCreateInfo *>(pass->create_info));
|
|
|
|
|
|
|
|
|
|
pass->create_info->name_ = shname;
|
|
|
|
|
|
|
|
|
|
GPUShader *shader = GPU_shader_create_from_info(info);
|
|
|
|
|
|
|
|
|
|
/* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
|
|
|
|
|
* We need to make sure to count active samplers to avoid undefined behavior. */
|
|
|
|
|
if (!gpu_pass_shader_validate(pass, shader)) {
|
|
|
|
|
success = false;
|
|
|
|
|
if (shader != nullptr) {
|
|
|
|
|
fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
|
|
|
|
|
GPU_shader_free(shader);
|
|
|
|
|
shader = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
pass->shader = shader;
|
|
|
|
|
pass->compiled = true;
|
|
|
|
|
}
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GPUShader *GPU_pass_shader_get(GPUPass *pass)
|
|
|
|
|
{
|
|
|
|
|
return pass->shader;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gpu_pass_free(GPUPass *pass)
|
|
|
|
|
{
|
|
|
|
|
BLI_assert(pass->refcount == 0);
|
|
|
|
|
if (pass->shader) {
|
|
|
|
|
GPU_shader_free(pass->shader);
|
|
|
|
|
}
|
|
|
|
|
delete pass->create_info;
|
|
|
|
|
MEM_freeN(pass);
|
|
|
|
|
}
|
|
|
|
|
|
2023-03-16 08:19:32 +01:00
|
|
|
void GPU_pass_release(GPUPass *pass)
|
|
|
|
|
{
|
|
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
|
|
|
|
BLI_assert(pass->refcount > 0);
|
|
|
|
|
pass->refcount--;
|
|
|
|
|
/* Un-cached passes will not be filtered by garbage collection, so release here. */
|
|
|
|
|
if (pass->refcount == 0 && !pass->cached) {
|
|
|
|
|
gpu_pass_free(pass);
|
|
|
|
|
}
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-02 19:37:22 +10:00
|
|
|
void GPU_pass_cache_garbage_collect()
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
|
|
|
|
const int shadercollectrate = 60; /* hardcoded for now. */
|
2022-09-25 18:33:28 +10:00
|
|
|
int ctime = int(PIL_check_seconds_timer());
|
2022-04-14 18:47:58 +02:00
|
|
|
|
|
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
|
|
|
|
GPUPass *next, **prev_pass = &pass_cache;
|
|
|
|
|
for (GPUPass *pass = pass_cache; pass; pass = next) {
|
|
|
|
|
next = pass->next;
|
2023-04-10 11:39:00 +02:00
|
|
|
if (pass->refcount > 0) {
|
|
|
|
|
pass->gc_timestamp = ctime;
|
|
|
|
|
}
|
|
|
|
|
else if (pass->gc_timestamp + shadercollectrate < ctime) {
|
2022-04-14 18:47:58 +02:00
|
|
|
/* Remove from list */
|
|
|
|
|
*prev_pass = next;
|
|
|
|
|
gpu_pass_free(pass);
|
2023-04-10 11:39:00 +02:00
|
|
|
continue;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
2023-04-10 11:39:00 +02:00
|
|
|
prev_pass = &pass->next;
|
2022-04-14 18:47:58 +02:00
|
|
|
}
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-02 19:37:22 +10:00
|
|
|
void GPU_pass_cache_init()
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
|
|
|
|
BLI_spin_init(&pass_cache_spin);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-02 19:37:22 +10:00
|
|
|
void GPU_pass_cache_free()
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
|
|
|
|
BLI_spin_lock(&pass_cache_spin);
|
|
|
|
|
while (pass_cache) {
|
|
|
|
|
GPUPass *next = pass_cache->next;
|
|
|
|
|
gpu_pass_free(pass_cache);
|
|
|
|
|
pass_cache = next;
|
|
|
|
|
}
|
|
|
|
|
BLI_spin_unlock(&pass_cache_spin);
|
|
|
|
|
|
|
|
|
|
BLI_spin_end(&pass_cache_spin);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** \} */
|
|
|
|
|
|
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
|
|
|
/** \name Module
|
|
|
|
|
* \{ */
|
|
|
|
|
|
2023-07-20 11:30:25 +10:00
|
|
|
void gpu_codegen_init() {}
|
2022-04-14 18:47:58 +02:00
|
|
|
|
2023-07-02 19:37:22 +10:00
|
|
|
void gpu_codegen_exit()
|
2022-04-14 18:47:58 +02:00
|
|
|
{
|
|
|
|
|
BKE_material_defaults_free_gpu();
|
|
|
|
|
GPU_shader_free_builtin_shaders();
|
|
|
|
|
}
|
|
|
|
|
|
2022-04-28 14:03:47 +10:00
|
|
|
/** \} */
|