Files
test2/source/blender/gpu/intern/gpu_material.cc

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

912 lines
30 KiB
C++
Raw Normal View History

/* SPDX-FileCopyrightText: 2006 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*
* Manages materials, lights and textures.
2011-02-27 20:25:53 +00:00
*/
#include <cstring>
#include "MEM_guardedalloc.h"
#include "DNA_material_types.h"
#include "DNA_scene_types.h"
#include "DNA_world_types.h"
2018-07-20 20:06:39 +02:00
#include "BLI_listbase.h"
#include "BLI_math_vector.h"
#include "BLI_string.h"
#include "BLI_time.h"
#include "BLI_utildefines.h"
#include "BKE_main.hh"
#include "BKE_material.hh"
#include "BKE_node.hh"
#include "NOD_shader.h"
2024-02-01 10:40:24 -05:00
#include "GPU_material.hh"
#include "GPU_shader.hh"
#include "GPU_texture.hh"
#include "GPU_uniform_buffer.hh"
2024-01-05 11:16:57 -05:00
#include "DRW_engine.hh"
#include "gpu_codegen.hh"
#include "gpu_node_graph.hh"
#include "atomic_ops.h"
/* Structs */
#define MAX_COLOR_BAND 128
#define MAX_GPU_SKIES 8
/**
* Whether the optimized variant of the GPUPass should be created asynchronously.
2023-02-14 21:51:03 +01:00
* Usage of this depends on whether there are possible threading challenges of doing so.
* Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
* compilation, though this option exists in case any potential scenarios for material graph
* optimization cause a slow down on the main thread.
*
* NOTE: The actual shader program for the optimized pass will always be compiled asynchronously,
* this flag controls whether shader node graph source serialization happens on the compilation
* worker thread as well. */
#define ASYNC_OPTIMIZED_PASS_CREATION 0
struct GPUColorBandBuilder {
float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
int current_layer;
};
struct GPUSkyBuilder {
float pixels[MAX_GPU_SKIES][GPU_SKY_WIDTH * GPU_SKY_HEIGHT][4];
int current_layer;
};
struct GPUMaterial {
2022-09-23 14:33:43 +10:00
/* Contains #GPUShader and source code for deferred compilation.
* Can be shared between similar material (i.e: sharing same node-tree topology). */
GPUPass *pass;
2023-02-14 21:51:03 +01:00
/* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
* This shader variant bakes dynamic uniform data as constant. This variant will not use
* the ubo, and instead bake constants directly into the shader source. */
GPUPass *optimized_pass;
/* Optimization status.
* We also use this status to determine whether this material should be considered for
* optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
* `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
* `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
* performance to do so, based on the heuristic.
*/
eGPUMaterialOptimizationStatus optimization_status;
double creation_time;
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
struct DeferredOptimizePass {
GPUCodegenCallbackFn callback;
void *thunk;
} DeferredOptimizePass;
struct DeferredOptimizePass optimize_pass_info;
#endif
/** UBOs for this material parameters. */
GPUUniformBuf *ubo;
/** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
eGPUMaterialStatus status;
/** Some flags about the nodetree & the needed resources. */
eGPUMaterialFlag flag;
/** The engine type this material is compiled for. */
eGPUMaterialEngine engine;
/* Identify shader variations (shadow, probe, world background...) */
uint64_t uuid;
/* Number of generated function. */
int generated_function_len;
/** Object type for attribute fetching. */
bool is_volume_shader;
/** DEPRECATED Currently only used for deferred compilation. */
Scene *scene;
/** Source material, might be null. */
Material *ma;
/** 1D Texture array containing all color bands. */
GPUTexture *coba_tex;
/** Builder for coba_tex. */
GPUColorBandBuilder *coba_builder;
/** 2D Texture array containing all sky textures. */
GPUTexture *sky_tex;
/** Builder for sky_tex. */
GPUSkyBuilder *sky_builder;
/* Low level node graph(s). Also contains resources needed by the material. */
GPUNodeGraph graph;
2023-02-14 21:51:03 +01:00
/** Default material reference used for PSO cache warming. Default materials may perform
* different operations, but the permutation will frequently share the same input PSO
2023-02-15 13:11:14 +11:00
* descriptors. This enables asynchronous PSO compilation as part of the deferred compilation
2023-02-14 21:51:03 +01:00
* pass, reducing runtime stuttering and responsiveness while compiling materials. */
GPUMaterial *default_mat;
/** DEPRECATED: To remove. */
bool has_surface_output;
bool has_volume_output;
bool has_displacement_output;
uint32_t refcount;
bool do_batch_compilation;
#ifndef NDEBUG
char name[64];
#else
char name[16];
#endif
};
/* Functions */
GPUTexture **gpu_material_sky_texture_layer_set(
GPUMaterial *mat, int width, int height, const float *pixels, float *row)
{
/* In order to put all sky textures into one 2D array texture,
* we need them to be the same size. */
BLI_assert(width == GPU_SKY_WIDTH);
BLI_assert(height == GPU_SKY_HEIGHT);
UNUSED_VARS_NDEBUG(width, height);
if (mat->sky_builder == nullptr) {
mat->sky_builder = static_cast<GPUSkyBuilder *>(
MEM_mallocN(sizeof(GPUSkyBuilder), "GPUSkyBuilder"));
mat->sky_builder->current_layer = 0;
}
int layer = mat->sky_builder->current_layer;
*row = float(layer);
if (*row == MAX_GPU_SKIES) {
printf("Too many sky textures in shader!\n");
}
else {
float *dst = (float *)mat->sky_builder->pixels[layer];
memcpy(dst, pixels, sizeof(float) * GPU_SKY_WIDTH * GPU_SKY_HEIGHT * 4);
mat->sky_builder->current_layer += 1;
}
return &mat->sky_tex;
}
GPUTexture **gpu_material_ramp_texture_row_set(GPUMaterial *mat,
int size,
const float *pixels,
float *r_row)
{
/* In order to put all the color-bands into one 1D array texture,
* we need them to be the same size. */
BLI_assert(size == CM_TABLE + 1);
UNUSED_VARS_NDEBUG(size);
if (mat->coba_builder == nullptr) {
mat->coba_builder = static_cast<GPUColorBandBuilder *>(
MEM_mallocN(sizeof(GPUColorBandBuilder), "GPUColorBandBuilder"));
mat->coba_builder->current_layer = 0;
}
int layer = mat->coba_builder->current_layer;
*r_row = float(layer);
if (*r_row == MAX_COLOR_BAND) {
printf("Too many color band in shader! Remove some Curve, Black Body or Color Ramp Node.\n");
}
else {
float *dst = (float *)mat->coba_builder->pixels[layer];
memcpy(dst, pixels, sizeof(float) * (CM_TABLE + 1) * 4);
mat->coba_builder->current_layer += 1;
}
return &mat->coba_tex;
}
static void gpu_material_ramp_texture_build(GPUMaterial *mat)
{
if (mat->coba_builder == nullptr) {
return;
2019-04-22 09:32:37 +10:00
}
GPUColorBandBuilder *builder = mat->coba_builder;
mat->coba_tex = GPU_texture_create_1d_array("mat_ramp",
CM_TABLE + 1,
builder->current_layer,
1,
GPU_RGBA16F,
GPU_TEXTURE_USAGE_SHADER_READ,
(float *)builder->pixels);
MEM_freeN(builder);
mat->coba_builder = nullptr;
}
static void gpu_material_sky_texture_build(GPUMaterial *mat)
{
if (mat->sky_builder == nullptr) {
return;
}
mat->sky_tex = GPU_texture_create_2d_array("mat_sky",
GPU_SKY_WIDTH,
GPU_SKY_HEIGHT,
mat->sky_builder->current_layer,
1,
GPU_RGBA32F,
GPU_TEXTURE_USAGE_SHADER_READ,
(float *)mat->sky_builder->pixels);
MEM_freeN(mat->sky_builder);
mat->sky_builder = nullptr;
}
void GPU_material_free_single(GPUMaterial *material)
{
bool do_free = atomic_sub_and_fetch_uint32(&material->refcount, 1) == 0;
if (!do_free) {
return;
}
gpu_node_graph_free(&material->graph);
if (material->optimized_pass != nullptr) {
2023-02-14 21:51:03 +01:00
GPU_pass_release(material->optimized_pass);
}
if (material->pass != nullptr) {
GPU_pass_release(material->pass);
}
if (material->ubo != nullptr) {
GPU_uniformbuf_free(material->ubo);
}
if (material->coba_builder != nullptr) {
MEM_freeN(material->coba_builder);
}
if (material->coba_tex != nullptr) {
GPU_texture_free(material->coba_tex);
}
if (material->sky_tex != nullptr) {
GPU_texture_free(material->sky_tex);
}
MEM_freeN(material);
}
void GPU_material_free(ListBase *gpumaterial)
{
LISTBASE_FOREACH (LinkData *, link, gpumaterial) {
GPUMaterial *material = static_cast<GPUMaterial *>(link->data);
DRW_deferred_shader_remove(material);
GPU_material_free_single(material);
}
BLI_freelistN(gpumaterial);
}
Scene *GPU_material_scene(GPUMaterial *material)
{
return material->scene;
}
GPUPass *GPU_material_get_pass(GPUMaterial *material)
{
2023-02-14 21:51:03 +01:00
/* If an optimized pass variant is available, and optimization is
* flagged as complete, we use this one instead. */
return ((GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS) &&
material->optimized_pass) ?
material->optimized_pass :
material->pass;
}
GPUShader *GPU_material_get_shader(GPUMaterial *material)
{
2023-02-14 21:51:03 +01:00
/* If an optimized material shader variant is available, and optimization is
* flagged as complete, we use this one instead. */
GPUShader *shader = ((GPU_material_optimization_status(material) ==
GPU_MAT_OPTIMIZATION_SUCCESS) &&
material->optimized_pass) ?
GPU_pass_shader_get(material->optimized_pass) :
nullptr;
return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : nullptr);
2023-02-14 21:51:03 +01:00
}
GPUShader *GPU_material_get_shader_base(GPUMaterial *material)
{
return (material->pass) ? GPU_pass_shader_get(material->pass) : nullptr;
}
const char *GPU_material_get_name(GPUMaterial *material)
{
return material->name;
}
Material *GPU_material_get_material(GPUMaterial *material)
{
return material->ma;
}
GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
{
return material->ubo;
}
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
{
material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name);
}
ListBase GPU_material_attributes(const GPUMaterial *material)
{
return material->graph.attributes;
}
ListBase GPU_material_textures(GPUMaterial *material)
{
return material->graph.textures;
}
const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material)
{
const GPUUniformAttrList *attrs = &material->graph.uniform_attrs;
return attrs->count > 0 ? attrs : nullptr;
}
Attribute Node: support accessing attributes of View Layer and Scene. The attribute node already allows accessing attributes associated with objects and meshes, which allows changing the behavior of the same material between different objects or instances. The same idea can be extended to an even more global level of layers and scenes. Currently view layers provide an option to replace all materials with a different one. However, since the same material will be applied to all objects in the layer, varying the behavior between layers while preserving distinct materials requires duplicating objects. Providing access to properties of layers and scenes via the attribute node enables making materials with built-in switches or settings that can be controlled globally at the view layer level. This is probably most useful for complex NPR shading and compositing. Like with objects, the node can also access built-in scene properties, like render resolution or FOV of the active camera. Lookup is also attempted in World, similar to how the Object mode checks the Mesh datablock. In Cycles this mode is implemented by replacing the attribute node with the attribute value during sync, allowing constant folding to take the values into account. This means however that materials that use this feature have to be re-synced upon any changes to scene, world or camera. The Eevee version uses a new uniform buffer containing a sorted array mapping name hashes to values, with binary search lookup. The array is limited to 512 entries, which is effectively limitless even considering it is shared by all materials in the scene; it is also just 16KB of memory so no point trying to optimize further. The buffer has to be rebuilt when new attributes are detected in a material, so the draw engine keeps a table of recently seen attribute names to minimize the chance of extra rebuilds mid-draw. Differential Revision: https://developer.blender.org/D15941
2022-09-12 00:30:58 +03:00
const ListBase *GPU_material_layer_attributes(const GPUMaterial *material)
{
const ListBase *attrs = &material->graph.layer_attrs;
return !BLI_listbase_is_empty(attrs) ? attrs : nullptr;
Attribute Node: support accessing attributes of View Layer and Scene. The attribute node already allows accessing attributes associated with objects and meshes, which allows changing the behavior of the same material between different objects or instances. The same idea can be extended to an even more global level of layers and scenes. Currently view layers provide an option to replace all materials with a different one. However, since the same material will be applied to all objects in the layer, varying the behavior between layers while preserving distinct materials requires duplicating objects. Providing access to properties of layers and scenes via the attribute node enables making materials with built-in switches or settings that can be controlled globally at the view layer level. This is probably most useful for complex NPR shading and compositing. Like with objects, the node can also access built-in scene properties, like render resolution or FOV of the active camera. Lookup is also attempted in World, similar to how the Object mode checks the Mesh datablock. In Cycles this mode is implemented by replacing the attribute node with the attribute value during sync, allowing constant folding to take the values into account. This means however that materials that use this feature have to be re-synced upon any changes to scene, world or camera. The Eevee version uses a new uniform buffer containing a sorted array mapping name hashes to values, with binary search lookup. The array is limited to 512 entries, which is effectively limitless even considering it is shared by all materials in the scene; it is also just 16KB of memory so no point trying to optimize further. The buffer has to be rebuilt when new attributes are detected in a material, so the draw engine keeps a table of recently seen attribute names to minimize the chance of extra rebuilds mid-draw. Differential Revision: https://developer.blender.org/D15941
2022-09-12 00:30:58 +03:00
}
void GPU_material_output_surface(GPUMaterial *material, GPUNodeLink *link)
{
if (!material->graph.outlink_surface) {
material->graph.outlink_surface = link;
material->has_surface_output = true;
}
}
void GPU_material_output_volume(GPUMaterial *material, GPUNodeLink *link)
{
if (!material->graph.outlink_volume) {
material->graph.outlink_volume = link;
material->has_volume_output = true;
}
}
void GPU_material_output_displacement(GPUMaterial *material, GPUNodeLink *link)
{
if (!material->graph.outlink_displacement) {
material->graph.outlink_displacement = link;
material->has_displacement_output = true;
}
}
void GPU_material_output_thickness(GPUMaterial *material, GPUNodeLink *link)
{
if (!material->graph.outlink_thickness) {
material->graph.outlink_thickness = link;
2019-04-22 09:32:37 +10:00
}
}
EEVEE: Arbitrary Output Variables This patch adds support for AOVs in EEVEE. AOV Outputs can be defined in the render pass tab and used in shader materials. Both Object and World based shaders are supported. The AOV can be previewed in the viewport using the renderpass selector in the shading popover. AOV names that conflict with other AOVs are automatically corrected. AOV conflicts with render passes get a warning icon. The reason behind this is that changing render engines/passes can change the conflict, but you might not notice it. Changing this automatically would also make the materials incorrect, so best to leave this to the user. **Implementation** The patch adds a copies the AOV structures of Cycles into Blender. The goal is that the Cycles will use Blenders AOV defintions. In the Blender kernel (`layer.c`) the logic of these structures are implemented. The GLSL shader of any GPUMaterial can hold multiple outputs (the main output and the AOV outputs) based on the renderPassUBO the right output is selected. This selection uses an hash that encodes the AOV structure. The full AOV needed to be encoded when actually drawing the material pass as the AOV type changes the behavior of the AOV. This isn't known yet when the GLSL is compiled. **Future Developments** * The AOV definitions in the render layer panel isn't shared with Cycles. Cycles should be migrated to use the same viewlayer aovs. During a previous attempt this failed as the AOV validation in cycles and in Blender have implementation differences what made it crash when an aov name was invalid. This could be fixed by extending the external render engine API. * Add support to Cycles to render AOVs in the 3d viewport. * Use a drop down list for selecting AOVs in the AOV Output node. * Give user feedback when multiple AOV output nodes with the same AOV name exists in the same shader. * Fix viewing single channel images in the image editor [T83314] * Reduce viewport render time by only render needed draw passes. [T83316] Reviewed By: Brecht van Lommel, Clément Foucault Differential Revision: https://developer.blender.org/D7010
2020-12-04 08:13:54 +01:00
void GPU_material_add_output_link_aov(GPUMaterial *material, GPUNodeLink *link, int hash)
{
GPUNodeGraphOutputLink *aov_link = static_cast<GPUNodeGraphOutputLink *>(
MEM_callocN(sizeof(GPUNodeGraphOutputLink), __func__));
EEVEE: Arbitrary Output Variables This patch adds support for AOVs in EEVEE. AOV Outputs can be defined in the render pass tab and used in shader materials. Both Object and World based shaders are supported. The AOV can be previewed in the viewport using the renderpass selector in the shading popover. AOV names that conflict with other AOVs are automatically corrected. AOV conflicts with render passes get a warning icon. The reason behind this is that changing render engines/passes can change the conflict, but you might not notice it. Changing this automatically would also make the materials incorrect, so best to leave this to the user. **Implementation** The patch adds a copies the AOV structures of Cycles into Blender. The goal is that the Cycles will use Blenders AOV defintions. In the Blender kernel (`layer.c`) the logic of these structures are implemented. The GLSL shader of any GPUMaterial can hold multiple outputs (the main output and the AOV outputs) based on the renderPassUBO the right output is selected. This selection uses an hash that encodes the AOV structure. The full AOV needed to be encoded when actually drawing the material pass as the AOV type changes the behavior of the AOV. This isn't known yet when the GLSL is compiled. **Future Developments** * The AOV definitions in the render layer panel isn't shared with Cycles. Cycles should be migrated to use the same viewlayer aovs. During a previous attempt this failed as the AOV validation in cycles and in Blender have implementation differences what made it crash when an aov name was invalid. This could be fixed by extending the external render engine API. * Add support to Cycles to render AOVs in the 3d viewport. * Use a drop down list for selecting AOVs in the AOV Output node. * Give user feedback when multiple AOV output nodes with the same AOV name exists in the same shader. * Fix viewing single channel images in the image editor [T83314] * Reduce viewport render time by only render needed draw passes. [T83316] Reviewed By: Brecht van Lommel, Clément Foucault Differential Revision: https://developer.blender.org/D7010
2020-12-04 08:13:54 +01:00
aov_link->outlink = link;
aov_link->hash = hash;
BLI_addtail(&material->graph.outlink_aovs, aov_link);
}
void GPU_material_add_output_link_composite(GPUMaterial *material, GPUNodeLink *link)
{
GPUNodeGraphOutputLink *compositor_link = static_cast<GPUNodeGraphOutputLink *>(
MEM_callocN(sizeof(GPUNodeGraphOutputLink), __func__));
compositor_link->outlink = link;
BLI_addtail(&material->graph.outlink_compositor, compositor_link);
}
char *GPU_material_split_sub_function(GPUMaterial *material,
eGPUType return_type,
GPUNodeLink **link)
{
/* Force cast to return type. */
switch (return_type) {
case GPU_FLOAT:
GPU_link(material, "set_value", *link, link);
break;
case GPU_VEC3:
GPU_link(material, "set_rgb", *link, link);
break;
case GPU_VEC4:
GPU_link(material, "set_rgba", *link, link);
break;
default:
BLI_assert(0);
break;
}
GPUNodeGraphFunctionLink *func_link = static_cast<GPUNodeGraphFunctionLink *>(
MEM_callocN(sizeof(GPUNodeGraphFunctionLink), __func__));
func_link->outlink = *link;
SNPRINTF(func_link->name, "ntree_fn%d", material->generated_function_len++);
BLI_addtail(&material->graph.material_functions, func_link);
return func_link->name;
}
GPUNodeGraph *gpu_material_node_graph(GPUMaterial *material)
{
return &material->graph;
}
eGPUMaterialStatus GPU_material_status(GPUMaterial *mat)
{
return mat->status;
}
void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
{
mat->status = status;
}
2023-02-14 21:51:03 +01:00
eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
{
return mat->optimization_status;
}
void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
{
mat->optimization_status = status;
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
/* Reset creation timer to delay optimization pass. */
mat->creation_time = BLI_time_now_seconds();
2023-02-14 21:51:03 +01:00
}
}
bool GPU_material_optimization_ready(GPUMaterial *mat)
{
/* Timer threshold before optimizations will be queued.
* When materials are frequently being modified, optimization
* can incur CPU overhead from excessive compilation.
*
* As the optimization is entirely asynchronous, it is still beneficial
* to do this quickly to avoid build-up and improve runtime performance.
* The threshold just prevents compilations being queued frame after frame. */
const double optimization_time_threshold_s = 1.2;
return ((BLI_time_now_seconds() - mat->creation_time) >= optimization_time_threshold_s);
2023-02-14 21:51:03 +01:00
}
void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material)
{
if (material != default_material) {
material->default_mat = default_material;
}
}
/* Code generation */
bool GPU_material_has_surface_output(GPUMaterial *mat)
{
return mat->has_surface_output;
}
bool GPU_material_has_volume_output(GPUMaterial *mat)
{
return mat->has_volume_output;
}
bool GPU_material_has_displacement_output(GPUMaterial *mat)
{
return mat->has_displacement_output;
}
void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag)
{
if ((flag & GPU_MATFLAG_GLOSSY) && (mat->flag & GPU_MATFLAG_GLOSSY)) {
/* Tag material using multiple glossy BSDF as using clear coat. */
mat->flag |= GPU_MATFLAG_COAT;
}
mat->flag |= flag;
}
bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag)
{
return (mat->flag & flag) != 0;
}
eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat)
{
return mat->flag;
}
bool GPU_material_recalc_flag_get(GPUMaterial *mat)
{
/* NOTE: Consumes the flags. */
bool updated = (mat->flag & GPU_MATFLAG_UPDATED) != 0;
mat->flag &= ~GPU_MATFLAG_UPDATED;
return updated;
}
uint64_t GPU_material_uuid_get(GPUMaterial *mat)
{
return mat->uuid;
}
GPUMaterial *GPU_material_from_nodetree(Scene *scene,
Material *ma,
bNodeTree *ntree,
ListBase *gpumaterials,
const char *name,
eGPUMaterialEngine engine,
uint64_t shader_uuid,
bool is_volume_shader,
bool is_lookdev,
GPUCodegenCallbackFn callback,
void *thunk,
GPUMaterialPassReplacementCallbackFn pass_replacement_cb)
{
/* Search if this material is not already compiled. */
LISTBASE_FOREACH (LinkData *, link, gpumaterials) {
GPUMaterial *mat = (GPUMaterial *)link->data;
if (mat->uuid == shader_uuid && mat->engine == engine) {
return mat;
}
}
GPUMaterial *mat = static_cast<GPUMaterial *>(MEM_callocN(sizeof(GPUMaterial), "GPUMaterial"));
mat->ma = ma;
mat->scene = scene;
mat->engine = engine;
mat->uuid = shader_uuid;
mat->flag = GPU_MATFLAG_UPDATED;
mat->status = GPU_MAT_CREATED;
mat->default_mat = nullptr;
mat->is_volume_shader = is_volume_shader;
mat->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
mat->refcount = 1;
STRNCPY(mat->name, name);
if (is_lookdev) {
mat->flag |= GPU_MATFLAG_LOOKDEV_HACK;
}
/* Localize tree to create links for reroute and mute. */
bNodeTree *localtree = blender::bke::node_tree_localize(ntree, nullptr);
ntreeGPUMaterialNodes(localtree, mat);
gpu_material_ramp_texture_build(mat);
gpu_material_sky_texture_build(mat);
/* Use default material pass when possible. */
if (GPUPass *default_pass = pass_replacement_cb ? pass_replacement_cb(thunk, mat) : nullptr) {
mat->pass = default_pass;
GPU_pass_acquire(mat->pass);
/** WORKAROUND:
* The node tree code is never executed in default replaced passes,
* but the GPU validation will still complain if the node tree UBO is not bound.
* So we create a dummy UBO with (at least) the size of the default material one (192 bytes).
* We allocate 256 bytes to leave some room for future changes. */
mat->ubo = GPU_uniformbuf_create_ex(256, nullptr, "Dummy UBO");
}
else {
/* Create source code and search pass cache for an already compiled version. */
mat->pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, false);
}
if (mat->pass == nullptr) {
/* We had a cache hit and the shader has already failed to compile. */
mat->status = GPU_MAT_FAILED;
gpu_node_graph_free(&mat->graph);
}
else {
/* Determine whether we should generate an optimized variant of the graph.
* Heuristic is based on complexity of default material pass and shader node graph. */
if (GPU_pass_should_optimize(mat->pass)) {
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
}
2023-02-14 21:51:03 +01:00
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != nullptr) {
/* We had a cache hit and the shader is already compiled. */
mat->status = GPU_MAT_SUCCESS;
2023-02-14 21:51:03 +01:00
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
gpu_node_graph_free_nodes(&mat->graph);
2023-02-14 21:51:03 +01:00
}
}
2023-02-14 21:51:03 +01:00
/* Generate optimized pass. */
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
2023-02-14 21:51:03 +01:00
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
mat->optimized_pass = nullptr;
mat->optimize_pass_info.callback = callback;
mat->optimize_pass_info.thunk = thunk;
2023-02-14 21:51:03 +01:00
#else
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, true);
if (mat->optimized_pass == nullptr) {
/* Failed to create optimized pass. */
gpu_node_graph_free_nodes(&mat->graph);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
}
else {
GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
if (optimized_sh != nullptr) {
/* Optimized shader already available. */
2023-02-14 21:51:03 +01:00
gpu_node_graph_free_nodes(&mat->graph);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
2023-02-14 21:51:03 +01:00
}
}
#endif
}
}
/* Only free after GPU_pass_shader_get where GPUUniformBuf read data from the local tree. */
blender::bke::node_tree_free_local_tree(localtree);
BLI_assert(!localtree->id.py_instance); /* Or call #BKE_libblock_free_data_py. */
MEM_freeN(localtree);
/* Note that even if building the shader fails in some way, we still keep
* it to avoid trying to compile again and again, and simply do not use
* the actual shader on drawing. */
LinkData *link = static_cast<LinkData *>(MEM_callocN(sizeof(LinkData), "GPUMaterialLink"));
link->data = mat;
BLI_addtail(gpumaterials, link);
return mat;
}
void GPU_material_acquire(GPUMaterial *mat)
{
atomic_add_and_fetch_uint32(&mat->refcount, 1);
}
void GPU_material_release(GPUMaterial *mat)
{
GPU_material_free_single(mat);
}
static void gpu_material_finalize(GPUMaterial *mat, bool success)
{
mat->flag |= GPU_MATFLAG_UPDATED;
if (success) {
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != nullptr) {
2023-02-14 21:51:03 +01:00
2023-08-05 13:46:22 +10:00
/** Perform asynchronous Render Pipeline State Object (PSO) compilation.
2023-02-14 21:51:03 +01:00
*
2023-08-05 13:46:22 +10:00
* Warm PSO cache within asynchronous compilation thread using default material as source.
2023-02-14 21:51:03 +01:00
* GPU_shader_warm_cache(..) performs the API-specific PSO compilation using the assigned
* parent shader's cached PSO descriptors as an input.
*
* This is only applied if the given material has a specified default reference
* material available, and the default material is already compiled.
*
* As PSOs do not always match for default shaders, we limit warming for PSO
* configurations to ensure compile time remains fast, as these first
2023-02-15 13:11:14 +11:00
* entries will be the most commonly used PSOs. As not all PSOs are necessarily
* required immediately, this limit should remain low (1-3 at most). */
if (!ELEM(mat->default_mat, nullptr, mat)) {
if (mat->default_mat->pass != nullptr) {
2023-02-14 21:51:03 +01:00
GPUShader *parent_sh = GPU_pass_shader_get(mat->default_mat->pass);
if (parent_sh) {
/* Skip warming if cached pass is identical to the default material. */
if (mat->default_mat->pass != mat->pass && parent_sh != sh) {
GPU_shader_set_parent(sh, parent_sh);
GPU_shader_warm_cache(sh, 1);
}
2023-02-14 21:51:03 +01:00
}
}
}
/* Flag success. */
mat->status = GPU_MAT_SUCCESS;
2023-02-14 21:51:03 +01:00
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
/* Only free node graph nodes if not required by secondary optimization pass. */
gpu_node_graph_free_nodes(&mat->graph);
}
}
else {
mat->status = GPU_MAT_FAILED;
}
}
else {
mat->status = GPU_MAT_FAILED;
GPU_pass_release(mat->pass);
mat->pass = nullptr;
gpu_node_graph_free(&mat->graph);
}
}
void GPU_material_compile(GPUMaterial *mat)
{
bool success;
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
BLI_assert(mat->pass);
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
#ifndef NDEBUG
success = GPU_pass_compile(mat->pass, mat->name);
#else
success = GPU_pass_compile(mat->pass, __func__);
#endif
gpu_material_finalize(mat, success);
}
void GPU_material_async_compile(GPUMaterial *mat)
{
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
BLI_assert(mat->pass);
#ifndef NDEBUG
const char *name = mat->name;
#else
const char *name = __func__;
#endif
GPU_pass_begin_async_compilation(mat->pass, name);
}
bool GPU_material_async_try_finalize(GPUMaterial *mat)
{
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
if (GPU_pass_async_compilation_try_finalize(mat->pass)) {
gpu_material_finalize(mat, GPU_pass_shader_get(mat->pass) != nullptr);
return true;
}
return false;
}
2023-02-14 21:51:03 +01:00
void GPU_material_optimize(GPUMaterial *mat)
{
/* If shader is flagged for skipping optimization or has already been successfully
* optimized, skip. */
if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
return;
}
/* If original shader has not been fully compiled, we are not
* ready to perform optimization. */
if (mat->status != GPU_MAT_SUCCESS) {
/* Reset optimization status. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
return;
}
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
/* If the optimized pass is not valid, first generate optimized pass.
* NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
* used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
* optimal, as these do not benefit from caching, due to baked constants. However, this could
2023-03-09 10:39:49 +11:00
* possibly be cause for concern for certain cases. */
2023-02-14 21:51:03 +01:00
if (!mat->optimized_pass) {
mat->optimized_pass = GPU_generate_pass(mat,
&mat->graph,
mat->engine,
mat->optimize_pass_info.callback,
mat->optimize_pass_info.thunk,
true);
2023-02-14 21:51:03 +01:00
BLI_assert(mat->optimized_pass);
}
#else
if (!mat->optimized_pass) {
/* Optimized pass has not been created, skip future optimization attempts. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
return;
}
#endif
bool success;
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
2023-02-14 21:51:03 +01:00
#ifndef NDEBUG
success = GPU_pass_compile(mat->optimized_pass, mat->name);
#else
success = GPU_pass_compile(mat->optimized_pass, __func__);
#endif
if (success) {
GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
if (sh != nullptr) {
2023-08-05 13:46:22 +10:00
/** Perform asynchronous Render Pipeline State Object (PSO) compilation.
2023-02-14 21:51:03 +01:00
*
2023-08-05 13:46:22 +10:00
* Warm PSO cache within asynchronous compilation thread for optimized materials.
2023-02-14 21:51:03 +01:00
* This setup assigns the original unoptimized shader as a "parent" shader
* for the optimized version. This then allows the associated GPU backend to
* compile PSOs within this asynchronous pass, using the identical PSO descriptors of the
* parent shader.
*
* This eliminates all run-time stuttering associated with material optimization and ensures
* realtime material editing and animation remains seamless, while retaining optimal realtime
* performance. */
GPUShader *parent_sh = GPU_pass_shader_get(mat->pass);
if (parent_sh) {
GPU_shader_set_parent(sh, parent_sh);
GPU_shader_warm_cache(sh, -1);
}
/* Mark as complete. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
}
else {
/* Optimized pass failed to compile. Disable any future optimization attempts. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
}
}
else {
/* Optimization pass generation failed. Disable future attempts to optimize. */
GPU_pass_release(mat->optimized_pass);
mat->optimized_pass = nullptr;
2023-02-14 21:51:03 +01:00
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
}
/* Release node graph as no longer needed. */
gpu_node_graph_free_nodes(&mat->graph);
}
void GPU_materials_free(Main *bmain)
{
LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
GPU_material_free(&ma->gpumaterial);
2019-04-22 09:32:37 +10:00
}
LISTBASE_FOREACH (World *, wo, &bmain->worlds) {
GPU_material_free(&wo->gpumaterial);
2019-04-22 09:32:37 +10:00
}
BKE_material_defaults_free_gpu();
}
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
ConstructGPUMaterialFn construct_function_cb,
GPUCodegenCallbackFn generate_code_function_cb,
void *thunk)
{
/* Allocate a new material and its material graph, and initialize its reference count. */
GPUMaterial *material = static_cast<GPUMaterial *>(
MEM_callocN(sizeof(GPUMaterial), "GPUMaterial"));
material->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
material->refcount = 1;
2023-02-14 21:51:03 +01:00
material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
material->optimized_pass = nullptr;
material->default_mat = nullptr;
material->engine = engine;
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
construct_function_cb(thunk, material);
/* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */
gpu_material_ramp_texture_build(material);
/* Lookup an existing pass in the cache or generate a new one. */
2023-02-14 21:51:03 +01:00
material->pass = GPU_generate_pass(
material, &material->graph, material->engine, generate_code_function_cb, thunk, false);
material->optimized_pass = nullptr;
/* The pass already exists in the pass cache but its shader already failed to compile. */
if (material->pass == nullptr) {
material->status = GPU_MAT_FAILED;
gpu_node_graph_free(&material->graph);
return material;
}
/* The pass already exists in the pass cache and its shader is already compiled. */
GPUShader *shader = GPU_pass_shader_get(material->pass);
if (shader != nullptr) {
material->status = GPU_MAT_SUCCESS;
2023-02-14 21:51:03 +01:00
if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
/* Only free node graph if not required by secondary optimization pass. */
gpu_node_graph_free_nodes(&material->graph);
}
return material;
}
/* The material was created successfully but still needs to be compiled. */
material->status = GPU_MAT_CREATED;
return material;
}