Files
test2/source/blender/draw/intern/draw_manager_c.hh
Clément Foucault 62826931b0 GPU: Move more linting and processing of GLSL to compile time
The goal is to reduce the startup time cost of
all of these parsing and string replacement.

All comments are now stripped at compile time.
This comment check added noticeable slowdown at
startup in debug builds and during preprocessing.

Put all metadatas between start and end token.
Use very simple parsing using `StringRef` and
hash all identifiers.

Move all the complexity to the preprocessor that
massagess the metadata into a well expected input
to the runtime parser.

All identifiers are compile time hashed so that no string
comparison is made at runtime.

Speed up the source loading:
- from 10ms to 1.6ms (6.25x speedup) in release
- from 194ms to 6ms (32.3x speedup) in debug

Follow up #129009

Pull Request: https://projects.blender.org/blender/blender/pulls/128927
2024-10-15 19:47:30 +02:00

759 lines
20 KiB
C++

/* SPDX-FileCopyrightText: 2016 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup draw
*/
/* Private functions / structs of the draw manager */
#pragma once
#include "DRW_engine.hh"
#include "DRW_render.hh"
#include "BLI_assert.h"
#include "BLI_linklist.h"
#include "BLI_memblock.h"
#include "BLI_task.h"
#include "BLI_threads.h"
#include "GPU_batch.hh"
#include "GPU_context.hh"
#include "GPU_drawlist.hh"
#include "GPU_framebuffer.hh"
#include "GPU_shader.hh"
#include "GPU_uniform_buffer.hh"
#include "GPU_viewport.hh"
#include "draw_instance_data.hh"
#include "draw_shader_shared.hh"
struct DRWDebugModule;
struct DRWTexturePool;
struct DRWUniformChunk;
struct DupliObject;
struct Object;
namespace blender::draw {
struct CurvesUniformBufPool;
struct DRW_Attributes;
struct DRW_MeshCDMask;
} // namespace blender::draw
struct GPUMaterial;
/** Use draw manager to call GPU_select, see: #DRW_draw_select_loop */
#define USE_GPU_SELECT
/** Use draw-call batching using instanced rendering. */
#define USE_BATCHING 1
// #define DRW_DEBUG_CULLING
#define DRW_DEBUG_USE_UNIFORM_NAME 0
#define DRW_UNIFORM_BUFFER_NAME 64
/* -------------------------------------------------------------------- */
/** \name Profiling
* \{ */
#define USE_PROFILE
#ifdef USE_PROFILE
# include "BLI_time.h"
# define PROFILE_TIMER_FALLOFF 0.04
# define PROFILE_START(time_start) \
double time_start = BLI_time_now_seconds(); \
((void)0)
# define PROFILE_END_ACCUM(time_accum, time_start) \
{ \
time_accum += (BLI_time_now_seconds() - time_start) * 1e3; \
} \
((void)0)
/* exp average */
# define PROFILE_END_UPDATE(time_update, time_start) \
{ \
double _time_delta = (BLI_time_now_seconds() - time_start) * 1e3; \
time_update = (time_update * (1.0 - PROFILE_TIMER_FALLOFF)) + \
(_time_delta * PROFILE_TIMER_FALLOFF); \
} \
((void)0)
#else /* USE_PROFILE */
# define PROFILE_START(time_start) (() 0)
# define PROFILE_END_ACCUM(time_accum, time_start) (() 0)
# define PROFILE_END_UPDATE(time_update, time_start) (() 0)
#endif /* USE_PROFILE */
/** \} */
/* -------------------------------------------------------------------- */
/** \name Data Structure
*
* Data structure to for registered draw engines that can store draw manager
* specific data.
* \{ */
typedef struct DRWRegisteredDrawEngine {
void /*DRWRegisteredDrawEngine*/ *next, *prev;
DrawEngineType *draw_engine;
/** Index of the type in the lists. Index is used for dupli data. */
int index;
} DRWRegisteredDrawEngine;
/**
* Data structure containing all drawcalls organized by passes and materials.
* DRWPass > DRWShadingGroup > DRWCall > DRWCallState
* > DRWUniform
*/
struct DRWCullingState {
uint32_t mask;
/* Culling: Using Bounding Sphere for now for faster culling.
* Not ideal for planes. Could be extended. */
BoundSphere bsphere;
/* Grrr only used by EEVEE. */
void *user_data;
};
/* Minimum max UBO size is 64KiB. We take the largest
* UBO struct and alloc the max number.
* `((1 << 16) / sizeof(DRWObjectMatrix)) = 512`
* Keep in sync with `common_view_lib.glsl`. */
#define DRW_RESOURCE_CHUNK_LEN 512
/**
* Identifier used to sort similar drawcalls together.
* Also used to reference elements inside memory blocks.
*
* From MSB to LSB
* 1 bit for negative scale.
* 22 bits for chunk id.
* 9 bits for resource id inside the chunk. (can go up to 511)
* |-|----------------------|---------|
*
* Use manual bit-shift and mask instead of bit-fields to avoid
* compiler dependent behavior that would mess the ordering of
* the members thus changing the sorting order.
*/
typedef uint32_t DRWResourceHandle;
BLI_INLINE uint32_t DRW_handle_negative_scale_get(const DRWResourceHandle *handle)
{
return (*handle & 0x80000000) != 0;
}
BLI_INLINE uint32_t DRW_handle_chunk_get(const DRWResourceHandle *handle)
{
return (*handle & 0x7FFFFFFF) >> 9;
}
BLI_INLINE uint32_t DRW_handle_id_get(const DRWResourceHandle *handle)
{
return (*handle & 0x000001FF);
}
BLI_INLINE void DRW_handle_increment(DRWResourceHandle *handle)
{
*handle += 1;
}
BLI_INLINE void DRW_handle_negative_scale_enable(DRWResourceHandle *handle)
{
*handle |= 0x80000000;
}
BLI_INLINE void *DRW_memblock_elem_from_handle(BLI_memblock *memblock,
const DRWResourceHandle *handle)
{
int elem = DRW_handle_id_get(handle);
int chunk = DRW_handle_chunk_get(handle);
return BLI_memblock_elem_get(memblock, chunk, elem);
}
struct DRWObjectMatrix {
float model[4][4];
float modelinverse[4][4];
};
struct DRWObjectInfos {
float orcotexfac[2][4];
float ob_color[4];
float ob_index;
float pad; /*UNUSED*/
float ob_random;
float ob_flag; /* Sign is negative scaling. */
};
BLI_STATIC_ASSERT_ALIGN(DRWObjectMatrix, 16)
BLI_STATIC_ASSERT_ALIGN(DRWObjectInfos, 16)
typedef enum {
/* Draw Commands */
DRW_CMD_DRAW = 0, /* Only sortable type. Must be 0. */
DRW_CMD_DRAW_RANGE = 1,
DRW_CMD_DRAW_INSTANCE = 2,
DRW_CMD_DRAW_INSTANCE_RANGE = 3,
DRW_CMD_DRAW_PROCEDURAL = 4,
DRW_CMD_DRAW_INDIRECT = 5,
/* Compute Commands. */
DRW_CMD_COMPUTE = 8,
DRW_CMD_COMPUTE_REF = 9,
DRW_CMD_COMPUTE_INDIRECT = 10,
/* Other Commands */
DRW_CMD_BARRIER = 11,
DRW_CMD_CLEAR = 12,
DRW_CMD_DRWSTATE = 13,
DRW_CMD_STENCIL = 14,
DRW_CMD_SELECTID = 15,
/* Needs to fit in 4bits */
} eDRWCommandType;
#define DRW_MAX_DRAW_CMD_TYPE DRW_CMD_DRAW_INDIRECT
struct DRWCommandDraw {
blender::gpu::Batch *batch;
DRWResourceHandle handle;
};
/* Assume DRWResourceHandle to be 0. */
struct DRWCommandDrawRange {
blender::gpu::Batch *batch;
DRWResourceHandle handle;
uint vert_first;
uint vert_count;
};
struct DRWCommandDrawInstance {
blender::gpu::Batch *batch;
DRWResourceHandle handle;
uint inst_count;
uint use_attrs; /* bool */
};
struct DRWCommandDrawInstanceRange {
blender::gpu::Batch *batch;
DRWResourceHandle handle;
uint inst_first;
uint inst_count;
};
struct DRWCommandDrawIndirect {
blender::gpu::Batch *batch;
DRWResourceHandle handle;
GPUStorageBuf *indirect_buf;
};
struct DRWCommandCompute {
int groups_x_len;
int groups_y_len;
int groups_z_len;
};
struct DRWCommandComputeRef {
int *groups_ref;
};
struct DRWCommandComputeIndirect {
GPUStorageBuf *indirect_buf;
};
struct DRWCommandBarrier {
eGPUBarrier type;
};
struct DRWCommandDrawProcedural {
blender::gpu::Batch *batch;
DRWResourceHandle handle;
uint vert_count;
};
struct DRWCommandSetMutableState {
/** State changes (or'd or and'd with the pass's state) */
DRWState enable;
DRWState disable;
};
struct DRWCommandSetStencil {
uint write_mask;
uint comp_mask;
uint ref;
};
struct DRWCommandSetSelectID {
blender::gpu::VertBuf *select_buf;
uint select_id;
};
struct DRWCommandClear {
eGPUFrameBufferBits clear_channels;
uchar r, g, b, a; /* [0..1] for each channels. Normalized. */
float depth; /* [0..1] for depth. Normalized. */
uchar stencil; /* Stencil value [0..255] */
};
union DRWCommand {
DRWCommandDraw draw;
DRWCommandDrawRange range;
DRWCommandDrawInstance instance;
DRWCommandDrawInstanceRange instance_range;
DRWCommandDrawProcedural procedural;
DRWCommandDrawIndirect draw_indirect;
DRWCommandCompute compute;
DRWCommandComputeRef compute_ref;
DRWCommandComputeIndirect compute_indirect;
DRWCommandBarrier barrier;
DRWCommandSetMutableState state;
DRWCommandSetStencil stencil;
DRWCommandSetSelectID select_id;
DRWCommandClear clear;
};
/** Used for aggregating calls into #blender::gpu::VertBuf's. */
struct DRWCallBuffer {
blender::gpu::VertBuf *buf;
blender::gpu::VertBuf *buf_select;
int count;
};
/** Used by #DRWUniform.type */
/* TODO(@jbakker): rename to DRW_RESOURCE/DRWResourceType. */
typedef enum {
DRW_UNIFORM_INT = 0,
DRW_UNIFORM_INT_COPY,
DRW_UNIFORM_FLOAT,
DRW_UNIFORM_FLOAT_COPY,
DRW_UNIFORM_TEXTURE,
DRW_UNIFORM_TEXTURE_REF,
DRW_UNIFORM_IMAGE,
DRW_UNIFORM_IMAGE_REF,
DRW_UNIFORM_BLOCK,
DRW_UNIFORM_BLOCK_REF,
DRW_UNIFORM_STORAGE_BLOCK,
DRW_UNIFORM_STORAGE_BLOCK_REF,
DRW_UNIFORM_TFEEDBACK_TARGET,
DRW_UNIFORM_VERTEX_BUFFER_AS_TEXTURE,
DRW_UNIFORM_VERTEX_BUFFER_AS_TEXTURE_REF,
DRW_UNIFORM_VERTEX_BUFFER_AS_STORAGE,
DRW_UNIFORM_VERTEX_BUFFER_AS_STORAGE_REF,
/** Per drawcall uniforms/UBO */
DRW_UNIFORM_BLOCK_OBMATS,
DRW_UNIFORM_BLOCK_OBINFOS,
DRW_UNIFORM_BLOCK_OBATTRS,
DRW_UNIFORM_BLOCK_VLATTRS,
DRW_UNIFORM_RESOURCE_CHUNK,
DRW_UNIFORM_RESOURCE_ID,
/** Legacy / Fallback */
DRW_UNIFORM_BASE_INSTANCE,
DRW_UNIFORM_MODEL_MATRIX,
DRW_UNIFORM_MODEL_MATRIX_INVERSE,
/* WARNING: set DRWUniform->type
* bit length accordingly. */
} DRWUniformType;
struct DRWUniform {
union {
/* For reference or array/vector types. */
const void *pvalue;
/* DRW_UNIFORM_TEXTURE */
struct {
union {
GPUTexture *texture;
GPUTexture **texture_ref;
};
GPUSamplerState sampler_state;
};
/* DRW_UNIFORM_BLOCK */
union {
GPUUniformBuf *block;
GPUUniformBuf **block_ref;
};
/* DRW_UNIFORM_STORAGE_BLOCK */
union {
GPUStorageBuf *ssbo;
GPUStorageBuf **ssbo_ref;
};
/* DRW_UNIFORM_VERTEX_BUFFER_AS_STORAGE */
union {
blender::gpu::VertBuf *vertbuf;
blender::gpu::VertBuf **vertbuf_ref;
};
/* DRW_UNIFORM_FLOAT_COPY */
float fvalue[4];
/* DRW_UNIFORM_INT_COPY */
int ivalue[4];
/* DRW_UNIFORM_BLOCK_OBATTRS */
const struct GPUUniformAttrList *uniform_attrs;
};
int location; /* Uniform location or binding point for textures and UBO's. */
uint8_t type; /* #DRWUniformType */
uint8_t length; /* Length of vector types. */
uint8_t arraysize; /* Array size of scalar/vector types. */
};
struct DRWShadingGroup {
DRWShadingGroup *next;
GPUShader *shader; /* Shader to bind */
DRWUniformChunk *uniforms; /* Uniforms pointers */
struct {
/* Chunks of draw calls. */
struct DRWCommandChunk *first, *last;
} cmd;
union {
/* This struct is used during cache populate. */
struct {
int objectinfo; /* Equal to 1 if the shader needs obinfos. */
DRWResourceHandle pass_handle; /* Memblock key to parent pass. */
/* Set of uniform attributes used by this shader. */
const struct GPUUniformAttrList *uniform_attrs;
};
/* This struct is used after cache populate if using the Z sorting.
* It will not conflict with the above struct. */
struct {
float distance; /* Distance from camera. */
uint original_index; /* Original position inside the shgroup list. */
} z_sorting;
};
};
#define MAX_PASS_NAME 32
struct DRWPass {
/* Linked list */
struct {
DRWShadingGroup *first;
DRWShadingGroup *last;
} shgroups;
/* Draw the shgroups of this pass instead.
* This avoid duplicating drawcalls/shgroups
* for similar passes. */
DRWPass *original;
/* Link list of additional passes to render. */
DRWPass *next;
DRWResourceHandle handle;
DRWState state;
char name[MAX_PASS_NAME];
};
#define MAX_CULLED_VIEWS 32
struct DRWView {
/**
* These float4x4 (as well as the ViewMatrices) have alignment requirements in C++
* (see math::MatBase) that isn't fulfilled in C. So they need to be manually aligned.
* Since the DRWView are allocated using BLI_memblock, the chunks are given to be 16 bytes
* aligned (equal to the alignment of float4x4). We then assert that the DRWView itself is 16
* bytes aligned.
*/
float4x4 persmat;
float4x4 persinv;
ViewMatrices storage;
/** Parent view if this is a sub view. nullptr otherwise. */
DRWView *parent;
float4 clip_planes[6];
/** Number of active clip planes. */
int clip_planes_len;
/** Does culling result needs to be updated. */
bool is_dirty;
/** Does facing needs to be reversed? */
bool is_inverted;
/** Culling */
uint32_t culling_mask;
BoundBox frustum_corners;
BoundSphere frustum_bsphere;
float frustum_planes[6][4];
/** Custom visibility function. */
DRWCallVisibilityFn *visibility_fn;
void *user_data;
};
/* Needed to assert that alignment is the same in C++ and C. */
BLI_STATIC_ASSERT_ALIGN(DRWView, 16);
/** \} */
/* -------------------------------------------------------------------- */
/** \name Data Chunks
*
* In order to keep a cache friendly data structure,
* we allocate most of our little data into chunks of multiple item.
* Iteration, allocation and memory usage are better.
* We lose a bit of memory by allocating more than what we need
* but it's counterbalanced by not needing the linked-list pointers
* for each item.
* \{ */
struct DRWUniformChunk {
DRWUniformChunk *next; /* single-linked list */
uint32_t uniform_len;
uint32_t uniform_used;
DRWUniform uniforms[10];
};
struct DRWCommandChunk {
DRWCommandChunk *next;
uint32_t command_len;
uint32_t command_used;
/* 4bits for each command. */
uint64_t command_type[6];
/* -- 64 bytes aligned -- */
DRWCommand commands[96];
/* -- 64 bytes aligned -- */
};
struct DRWCommandSmallChunk {
DRWCommandChunk *next;
uint32_t command_len;
uint32_t command_used;
/* 4bits for each command. */
/* TODO: reduce size of command_type. */
uint64_t command_type[6];
DRWCommand commands[6];
};
/* Only true for 64-bit platforms. */
#ifdef __LP64__
BLI_STATIC_ASSERT_ALIGN(DRWCommandChunk, 16);
#endif
/** \} */
/* -------------------------------------------------------------------- */
/** \name Memory Pools
* \{ */
/** Contains memory pools information. */
struct DRWData {
/** Instance data. */
DRWInstanceDataList *idatalist;
/** Memory-pools for draw-calls. */
BLI_memblock *commands;
BLI_memblock *commands_small;
BLI_memblock *callbuffers;
BLI_memblock *obmats;
BLI_memblock *obinfos;
BLI_memblock *cullstates;
BLI_memblock *shgroups;
BLI_memblock *uniforms;
BLI_memblock *views;
BLI_memblock *passes;
BLI_memblock *images;
GPUUniformBuf **matrices_ubo;
GPUUniformBuf **obinfos_ubo;
GHash *obattrs_ubo_pool;
GHash *vlattrs_name_cache;
ListBase vlattrs_name_list;
LayerAttribute *vlattrs_buf;
GPUUniformBuf *vlattrs_ubo;
bool vlattrs_ubo_ready;
uint ubo_len;
/** Per draw-call volume object data. */
void *volume_grids_ubos; /* VolumeUniformBufPool */
/** List of smoke textures to free after drawing. */
ListBase smoke_textures;
/**
* Texture pool to reuse temp texture across engines.
* TODO(@fclem): The pool could be shared even between view-ports.
*/
DRWTexturePool *texture_pool;
/** Per stereo view data. Contains engine data and default frame-buffers. */
DRWViewData *view_data[2];
/** Per draw-call curves object data. */
blender::draw::CurvesUniformBufPool *curves_ubos;
};
/** \} */
/* -------------------------------------------------------------------- */
/** \name Draw Manager
* \{ */
struct DupliKey {
Object *ob;
ID *ob_data;
};
#define DST_MAX_SLOTS 64 /* Cannot be changed without modifying RST.bound_tex_slots */
#define MAX_CLIP_PLANES 6 /* GL_MAX_CLIP_PLANES is at least 6 */
#define STENCIL_UNDEFINED 256
#define DRW_DRAWLIST_LEN 256
struct DRWManager {
/* TODO: clean up this struct a bit. */
/* Cache generation */
/* TODO(@fclem): Rename to data. */
DRWData *vmempool;
/** Active view data structure for one of the 2 stereo view. Not related to DRWView. */
DRWViewData *view_data_active;
/* State of the object being evaluated if already allocated. */
DRWResourceHandle ob_handle;
/** True if current DST.ob_state has its matching DRWObjectInfos init. */
bool ob_state_obinfo_init;
/** Handle of current object resource in object resource arrays (DRWObjectMatrices/Infos). */
DRWResourceHandle resource_handle;
/** Handle of next DRWPass to be allocated. */
DRWResourceHandle pass_handle;
/** Dupli object that corresponds to the current object. */
DupliObject *dupli_source;
/** Object that created the dupli-list the current object is part of. */
Object *dupli_parent;
/** Object referenced by the current dupli object. */
Object *dupli_origin;
/** Object-data referenced by the current dupli object. */
ID *dupli_origin_data;
/** Hash-map: #DupliKey -> void pointer for each enabled engine. */
GHash *dupli_ghash;
/** TODO(@fclem): try to remove usage of this. */
DRWInstanceData *object_instance_data[MAX_INSTANCE_DATA_SIZE];
/* Dupli data for the current dupli for each enabled engine. */
void **dupli_datas;
/* Rendering state */
GPUShader *shader;
blender::gpu::Batch *batch;
/* Managed by `DRW_state_set`, `DRW_state_reset` */
DRWState state;
DRWState state_lock;
/* Per viewport */
GPUViewport *viewport;
GPUFrameBuffer *default_framebuffer;
float size[2];
float inv_size[2];
float pixsize;
struct {
uint is_select : 1;
uint is_material_select : 1;
uint is_depth : 1;
uint is_image_render : 1;
uint is_scene_render : 1;
uint draw_background : 1;
uint draw_text : 1;
} options;
/* Current rendering context */
DRWContextState draw_ctx;
/* Convenience pointer to text_store owned by the viewport */
DRWTextStore **text_store_p;
bool buffer_finish_called; /* Avoid bad usage of DRW_render_instance_buffer_finish */
/** True, when drawing is in progress, see #DRW_draw_in_progress. */
bool in_progress;
DRWView *view_default;
DRWView *view_active;
DRWView *view_previous;
uint primary_view_num;
#ifdef USE_GPU_SELECT
uint select_id;
#endif
TaskGraph *task_graph;
/* Contains list of objects that needs to be extracted from other objects. */
GSet *delayed_extraction;
/* ---------- Nothing after this point is cleared after use ----------- */
/* system_gpu_context serves as the offset for clearing only
* the top portion of the struct so DO NOT MOVE IT! */
/** Unique ghost context used by the draw manager. */
void *system_gpu_context;
GPUContext *blender_gpu_context;
/** Mutex to lock the drw manager and avoid concurrent context usage. */
TicketMutex *system_gpu_context_mutex;
GPUDrawList *draw_list;
DRWDebugModule *debug;
};
extern DRWManager DST; /* TODO: get rid of this and allow multi-threaded rendering. */
/** \} */
/* -------------------------------------------------------------------- */
/** \name Functions
* \{ */
void drw_texture_set_parameters(GPUTexture *tex, DRWTextureFlag flags);
void *drw_viewport_engine_data_ensure(void *engine_type);
void drw_state_set(DRWState state);
void drw_debug_draw();
void drw_debug_init();
void drw_debug_module_free(DRWDebugModule *module);
GPUStorageBuf *drw_debug_gpu_draw_buf_get();
eDRWCommandType command_type_get(const uint64_t *command_type_bits, int index);
void drw_batch_cache_validate(Object *ob);
void drw_batch_cache_generate_requested(Object *ob);
/**
* \warning Only evaluated mesh data is handled by this delayed generation.
*/
void drw_batch_cache_generate_requested_delayed(Object *ob);
void drw_batch_cache_generate_requested_evaluated_mesh_or_curve(Object *ob);
void drw_resource_buffer_finish(DRWData *vmempool);
/* Procedural Drawing */
blender::gpu::Batch *drw_cache_procedural_points_get();
blender::gpu::Batch *drw_cache_procedural_lines_get();
blender::gpu::Batch *drw_cache_procedural_triangles_get();
blender::gpu::Batch *drw_cache_procedural_triangle_strips_get();
void drw_uniform_attrs_pool_update(GHash *table,
const GPUUniformAttrList *key,
DRWResourceHandle *handle,
const Object *ob,
const Object *dupli_parent,
const DupliObject *dupli_source);
GPUUniformBuf *drw_ensure_layer_attribute_buffer();
double *drw_engine_data_cache_time_get(GPUViewport *viewport);
void *drw_engine_data_engine_data_create(GPUViewport *viewport, void *engine_type);
void *drw_engine_data_engine_data_get(GPUViewport *viewport, void *engine_handle);
bool drw_engine_data_engines_data_validate(GPUViewport *viewport, void **engine_handle_array);
void drw_engine_data_cache_release(GPUViewport *viewport);
void drw_engine_data_free(GPUViewport *viewport);
namespace blender::draw {
void DRW_mesh_get_attributes(const Object &object,
const Mesh &mesh,
const GPUMaterial *const *gpumat_array,
int gpumat_array_len,
DRW_Attributes *r_attrs,
DRW_MeshCDMask *r_cd_needed);
} // namespace blender::draw
void DRW_manager_begin_sync();
void DRW_manager_end_sync();
/** \} */