EEVEE-Next: Shadow Rendering Refactor

Split shadow rendering per LOD per tilemap and improve
fragment shader invocation rate by using multi-viewport.

Also changes the layout of the atlas to be 4 x 4 x Layers.
This allow to grow the atlas while keeping the content
and page indirection correct, but this isn't implemented
in this patch.

# First attempt

Shadow rendering using atomic proved to be less than ideal
and performance were not quite to an acceptable level.

The previous method had issue with atomic contention when
a lot of triangle would overlap and too many fragment shader
invocations with quite complex indirection rules and biases
which made the technique costly.

The new implementation leverage multi viewport and
layered rendeing to effectively replace the need for atomic
and render directly to the shadow atlas. Using the well
supported extension these are free on modern hardware and
do not need a geometry shader.

One view per tile is needed since we use the viewport index
and the layer index as a way to index a specific tile in the
array.

# Geometric Complexity Problem

The counterpart of this is that we need to draw one geometry
instance per tile which is 32x32 time more instances (at most)
than with the previous method.

This means that we will have to find a way to mitigate this
geometry cost by either reducing the number of tiles per
tilemaps (in other words, making the system less memory efficient)
or splitting complex objects' geometry into smaller, more
cull friendly chunks (for example, like the sculpt PBVH nodes).
The later seems to be a longer term solution as it requires
way too much engineering time we have right now.

# Update Lag Problem

This also mean we can only update up to 64 tile per redraw
which is not enough even in the most basic cases. This leads
to missing or over shadowing when a light updates until there
is no updates and the shadow rendering can catch up.

One possible solution is to update a lower LODs first waiting
until there is no update to render. This would allow no artifact
during the transforms (unless there is too many light updates
even for lowest LOD, but that was an issue also for the
previous implementation). This could also help with the
geometric complexity.

# Solution

In the end, we decided to have one view per lod. This limits
the complexity of the fragment shader (improve speed),
reduces the number of views per tilemap (fix update lag),
and reduces the number of instances.
This also mean we cannot render directly to the atlas anymore
and reverted to the atomic solution. Using the smallest
possible viewport, we assure that there isn't that much fragment
shader invocations which was one of the bottleneck. And also
reduces the amount of geometry instances that pass the
clipping test.

Pull Request: https://projects.blender.org/blender/blender/pulls/110979
This commit is contained in:
Clément Foucault
2023-08-17 17:35:19 +02:00
committed by Clément Foucault
parent 3a3390fcdd
commit 672d25b02d
25 changed files with 666 additions and 445 deletions

View File

@@ -56,16 +56,20 @@
SHADOW_TILEMAP_LOD3_LEN + SHADOW_TILEMAP_LOD4_LEN + SHADOW_TILEMAP_LOD5_LEN)
#define SHADOW_PAGE_CLEAR_GROUP_SIZE 32
#define SHADOW_PAGE_RES 256
#define SHADOW_PAGE_LOD 8 /* LOG2(SHADOW_PAGE_RES) */
#define SHADOW_DEPTH_SCAN_GROUP_SIZE 8
#define SHADOW_AABB_TAG_GROUP_SIZE 64
#define SHADOW_MAX_TILEMAP 4096
#define SHADOW_MAX_TILE (SHADOW_MAX_TILEMAP * SHADOW_TILEDATA_PER_TILEMAP)
#define SHADOW_MAX_PAGE 4096
#define SHADOW_PAGE_PER_ROW 64
#define SHADOW_ATLAS_SLOT 5
#define SHADOW_BOUNDS_GROUP_SIZE 64
#define SHADOW_CLIPMAP_GROUP_SIZE 64
#define SHADOW_VIEW_MAX 64 /* Must match DRW_VIEW_MAX. */
#define SHADOW_RENDER_MAP_SIZE (SHADOW_VIEW_MAX * SHADOW_TILEMAP_LOD0_LEN)
#define SHADOW_ATOMIC 1
#define SHADOW_PAGE_PER_ROW 4
#define SHADOW_PAGE_PER_COL 4
#define SHADOW_PAGE_PER_LAYER (SHADOW_PAGE_PER_ROW * SHADOW_PAGE_PER_COL)
/* Ray-tracing. */
#define RAYTRACE_GROUP_SIZE 8
@@ -131,8 +135,6 @@
#define REFLECTION_PROBE_TEX_SLOT 8
#define VOLUME_SCATTERING_TEX_SLOT 9
#define VOLUME_TRANSMITTANCE_TEX_SLOT 10
/* Only during shadow rendering. */
#define SHADOW_RENDER_MAP_SLOT 4
/* Images. */
#define RBUFS_COLOR_SLOT 0
@@ -145,6 +147,8 @@
#define VOLUME_PROP_EXTINCTION_IMG_SLOT 1
#define VOLUME_PROP_EMISSION_IMG_SLOT 2
#define VOLUME_PROP_PHASE_IMG_SLOT 3
/* Only during shadow rendering. */
#define SHADOW_ATLAS_IMG_SLOT 4
/* Uniform Buffers. */
/* Slot 0 is GPU_NODE_TREE_UBO_SLOT. */
@@ -157,6 +161,7 @@
#define VOLUMES_INFO_BUF_SLOT 6
/* SLOT 6 is used by render shaders (Film, DoF and Motion Blur). Need to check if it should be
* assigned a different slot. */
/* TODO(fclem): This is above the limit of slot 6 for engines. Keep it lower by merging others. */
#define REFLECTION_PROBE_BUF_SLOT 7
/* Only during pre-pass. */
#define VELOCITY_CAMERA_PREV_BUF 3
@@ -169,14 +174,16 @@
#define LIGHT_ZBIN_BUF_SLOT 2
#define LIGHT_TILE_BUF_SLOT 3
#define IRRADIANCE_BRICK_BUF_SLOT 4
#define SAMPLING_BUF_SLOT 6
#define CRYPTOMATTE_BUF_SLOT 7
/* Only during surface capture. */
#define SURFEL_BUF_SLOT 4
/* Only during surface capture. */
#define CAPTURE_BUF_SLOT 5
/* Only during shadow rendering. */
#define SHADOW_RENDER_MAP_BUF_SLOT 3
#define SHADOW_PAGE_INFO_SLOT 4
#define SAMPLING_BUF_SLOT 6
#define CRYPTOMATTE_BUF_SLOT 7
#define SHADOW_VIEWPORT_INDEX_BUF_SLOT 5
/* Only during pre-pass. */
#define VELOCITY_OBJ_PREV_BUF_SLOT 0

View File

@@ -149,17 +149,14 @@ void WorldVolumePipeline::render(View &view)
void ShadowPipeline::sync()
{
surface_ps_.init();
/* TODO(fclem): Add state for rendering to empty framebuffer without depth test.
* For now this is only here for avoiding the rasterizer discard state. */
surface_ps_.state_set(DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_LESS);
surface_ps_.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);
surface_ps_.bind_texture(SHADOW_RENDER_MAP_SLOT, &inst_.shadows.render_map_tx_);
surface_ps_.bind_image(SHADOW_ATLAS_SLOT, &inst_.shadows.atlas_tx_);
surface_ps_.bind_image(SHADOW_ATLAS_IMG_SLOT, inst_.shadows.atlas_tx_);
surface_ps_.bind_ubo(CAMERA_BUF_SLOT, inst_.camera.ubo_get());
surface_ps_.bind_ssbo(SHADOW_RENDER_MAP_BUF_SLOT, &inst_.shadows.render_map_buf_);
surface_ps_.bind_ssbo(SHADOW_VIEWPORT_INDEX_BUF_SLOT, &inst_.shadows.viewport_index_buf_);
surface_ps_.bind_ssbo(SHADOW_PAGE_INFO_SLOT, &inst_.shadows.pages_infos_data_);
inst_.sampling.bind_resources(&surface_ps_);
surface_ps_.framebuffer_set(&inst_.shadows.render_fb_);
}
PassMain::Sub *ShadowPipeline::surface_material_add(GPUMaterial *gpumat)

View File

@@ -797,7 +797,9 @@ static inline int2 shadow_cascade_grid_offset(int2 base_offset, int level_relati
*/
struct ShadowTileMapData {
/** Cached, used for rendering. */
float4x4 viewmat, winmat;
float4x4 viewmat;
/** Precomputed matrix, not used for rendering but for tagging. */
float4x4 winmat;
/** Punctual : Corners of the frustum. (vec3 padded to vec4) */
float4 corners[4];
/** Integer offset of the center of the 16x16 tiles from the origin of the tile space. */
@@ -812,6 +814,16 @@ struct ShadowTileMapData {
int clip_data_index;
/** Bias LOD to tag for usage to lower the amount of tile used. */
float lod_bias;
int _pad0;
int _pad1;
int _pad2;
/** Near and far clip distances for punctual. */
float clip_near;
float clip_far;
/** Half of the tilemap size in world units. Used to compute directional window matrix. */
float half_size;
/** Offset in local space to the tilemap center in world units. Used for directional winmat. */
float2 center_offset;
};
BLI_STATIC_ASSERT_ALIGN(ShadowTileMapData, 16)
@@ -823,6 +835,7 @@ struct ShadowTileMapClip {
float clip_near_stored;
float clip_far_stored;
/** Near and far clip distances for directional. Float stored as int for atomic operations. */
/** NOTE: These are positive just like camera parameters. */
int clip_near;
int clip_far;
};
@@ -839,12 +852,10 @@ struct ShadowPagesInfoData {
uint page_cached_start;
/** Index of the last page in the buffer since the last defrag. */
uint page_cached_end;
/** Number of views to be rendered during the shadow update pass. */
int view_count;
/** Physical page size in pixel. Pages are all squares. */
int page_size;
int _pad0;
int _pad1;
int _pad2;
};
BLI_STATIC_ASSERT_ALIGN(ShadowPagesInfoData, 16)
@@ -854,13 +865,17 @@ struct ShadowStatistics {
int page_update_count;
int page_allocated_count;
int page_rendered_count;
int view_needed_count;
int _pad0;
int _pad1;
int _pad2;
};
BLI_STATIC_ASSERT_ALIGN(ShadowStatistics, 16)
/** Decoded tile data structure. */
struct ShadowTileData {
/** Page inside the virtual shadow map atlas. */
uint2 page;
uint3 page;
/** Page index inside pages_cached_buf. Only valid if `is_cached` is true. */
uint cache_index;
/** LOD pointed to LOD 0 tile page. (cube-map only). */
@@ -888,12 +903,29 @@ enum eShadowFlag : uint32_t {
SHADOW_IS_USED = (1u << 31u)
};
static inline uint shadow_page_pack(uint3 page)
{
/* NOTE: Trust the input to be in valid range.
* But sometime this is used to encode invalid pages uint3(-1) and it needs to output uint(-1).
*/
return (page.x << 0u) | (page.y << 2u) | (page.z << 4u);
}
static inline uint3 shadow_page_unpack(uint data)
{
uint3 page;
/* Tweaked for SHADOW_PAGE_PER_ROW = 4. */
page.x = data & uint(SHADOW_PAGE_PER_ROW - 1);
page.y = (data >> 2u) & uint(SHADOW_PAGE_PER_COL - 1);
page.z = (data >> 4u);
return page;
}
static inline ShadowTileData shadow_tile_unpack(ShadowTileDataPacked data)
{
ShadowTileData tile;
/* Tweaked for SHADOW_PAGE_PER_ROW = 64. */
tile.page.x = data & 63u;
tile.page.y = (data >> 6u) & 63u;
/* Tweaked for SHADOW_MAX_PAGE = 4096. */
tile.page = shadow_page_unpack(data & uint(SHADOW_MAX_PAGE - 1));
/* -- 12 bits -- */
/* Tweaked for SHADOW_TILEMAP_LOD < 8. */
tile.lod = (data >> 12u) & 7u;
@@ -911,9 +943,7 @@ static inline ShadowTileData shadow_tile_unpack(ShadowTileDataPacked data)
static inline ShadowTileDataPacked shadow_tile_pack(ShadowTileData tile)
{
uint data;
data = (tile.page.x & 63u);
data |= (tile.page.y & 63u) << 6u;
uint data = shadow_page_pack(tile.page) & uint(SHADOW_MAX_PAGE - 1);
data |= (tile.lod & 7u) << 12u;
data |= (tile.cache_index & 4095u) << 15u;
data |= (tile.is_used ? uint(SHADOW_IS_USED) : 0);

View File

@@ -54,13 +54,13 @@ void ShadowTileMap::sync_orthographic(const float4x4 &object_mat_,
* inverse in this particular case. */
viewmat = math::transpose(object_mat);
float half_size = ShadowDirectional::coverage_get(level) / 2.0f;
float2 win_offset = float2(grid_offset) * tile_size;
half_size = ShadowDirectional::coverage_get(level) / 2.0f;
center_offset = float2(grid_offset) * tile_size;
orthographic_m4(winmat.ptr(),
-half_size + win_offset.x,
half_size + win_offset.x,
-half_size + win_offset.y,
half_size + win_offset.y,
-half_size + center_offset.x,
half_size + center_offset.x,
-half_size + center_offset.y,
half_size + center_offset.y,
/* Near/far is computed on GPU using casters bounds. */
-1.0,
1.0);
@@ -69,15 +69,15 @@ void ShadowTileMap::sync_orthographic(const float4x4 &object_mat_,
void ShadowTileMap::sync_cubeface(
const float4x4 &object_mat_, float near_, float far_, eCubeFace face, float lod_bias_)
{
if (projection_type != SHADOW_PROJECTION_CUBEFACE || (cubeface != face) || (near != near_) ||
(far != far_))
if (projection_type != SHADOW_PROJECTION_CUBEFACE || (cubeface != face) ||
(clip_near != near_) || (clip_far != far_))
{
set_dirty();
}
projection_type = SHADOW_PROJECTION_CUBEFACE;
cubeface = face;
near = near_;
far = far_;
clip_near = near_;
clip_far = far_;
lod_bias = lod_bias_;
grid_offset = int2(0);
@@ -86,11 +86,13 @@ void ShadowTileMap::sync_cubeface(
set_dirty();
}
perspective_m4(winmat.ptr(), -near, near, -near, near, near, far);
winmat = math::projection::perspective(
-clip_near, clip_near, -clip_near, clip_near, clip_near, clip_far);
viewmat = float4x4(shadow_face_mat[cubeface]) * math::invert(object_mat);
/* Update corners. */
float4x4 viewinv = object_mat;
float far = clip_far;
corners[0] = float4(viewinv.location(), 0.0f);
corners[1] = float4(math::transform_point(viewinv, float3(-far, -far, -far)), 0.0f);
corners[2] = float4(math::transform_point(viewinv, float3(far, -far, -far)), 0.0f);
@@ -605,8 +607,8 @@ void ShadowDirectional::end_sync(Light &light, const Camera &camera, float lod_b
}
light.tilemap_index = tilemap_pool.tilemaps_data.size();
light.clip_near = int(0xFF7FFFFFu ^ 0x7FFFFFFFu); /* floatBitsToOrderedInt(-FLT_MAX) */
light.clip_far = 0x7F7FFFFF; /* floatBitsToOrderedInt(FLT_MAX) */
light.clip_near = 0x7F7FFFFF; /* floatBitsToOrderedInt(FLT_MAX) */
light.clip_far = int(0xFF7FFFFFu ^ 0x7FFFFFFFu); /* floatBitsToOrderedInt(-FLT_MAX) */
if (directional_distribution_type_get(camera) == SHADOW_PROJECTION_CASCADE) {
cascade_tilemaps_distribution(light, camera);
@@ -644,8 +646,11 @@ void ShadowModule::init()
}
}
int pool_size = enabled_ ? scene.eevee.shadow_pool_size : 0;
shadow_page_len_ = clamp_i(pool_size * 4, SHADOW_PAGE_PER_ROW, SHADOW_MAX_PAGE);
/* Pool size is in MBytes. */
const size_t pool_byte_size = enabled_ ? scene.eevee.shadow_pool_size * square_i(1024) : 1;
const size_t page_byte_size = square_i(shadow_page_size_) * sizeof(int);
shadow_page_len_ = int(divide_ceil_ul(pool_byte_size, page_byte_size));
shadow_page_len_ = min_ii(shadow_page_len_, SHADOW_MAX_PAGE);
float simplify_shadows = 1.0f;
if (scene.r.mode & R_SIMPLIFY) {
@@ -654,18 +659,18 @@ void ShadowModule::init()
}
lod_bias_ = math::interpolate(float(SHADOW_TILEMAP_LOD), 0.0f, simplify_shadows);
int2 atlas_extent = shadow_page_size_ *
int2(SHADOW_PAGE_PER_ROW, shadow_page_len_ / SHADOW_PAGE_PER_ROW);
const int2 atlas_extent = shadow_page_size_ * int2(SHADOW_PAGE_PER_ROW);
const int atlas_layers = divide_ceil_u(shadow_page_len_, SHADOW_PAGE_PER_LAYER);
eGPUTextureUsage tex_usage = GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE;
if (atlas_tx_.ensure_2d(atlas_type, atlas_extent, tex_usage)) {
if (atlas_tx_.ensure_2d_array(atlas_type, atlas_extent, atlas_layers, tex_usage)) {
/* Global update. */
do_full_update = true;
}
/* Make allocation safe. Avoids crash later on. */
if (!atlas_tx_.is_valid()) {
atlas_tx_.ensure_2d(atlas_type, int2(1));
atlas_tx_.ensure_2d_array(atlas_type, int2(1), 1);
inst_.info = "Error: Could not allocate shadow atlas. Most likely out of GPU memory.";
}
@@ -689,11 +694,24 @@ void ShadowModule::init()
<< stats.page_used_count << " / " << shadow_page_len_ << ")\n";
inst_.info = ss.str();
}
if (stats.view_needed_count > SHADOW_VIEW_MAX && enabled_) {
std::stringstream ss;
ss << "Error: Too many shadow updates, some shadow might be incorrect.\n";
inst_.info = ss.str();
}
}
atlas_tx_.filter_mode(false);
render_map_tx_.ensure_mip_views();
/* Create different viewport to support different update region size. The most fitting viewport
* is then selected during the tilemap finalize stage in `viewport_select`. */
for (int i = 0; i < multi_viewports_.size(); i++) {
int size_in_tile = min_ii(1 << i, SHADOW_TILEMAP_RES);
multi_viewports_[i][0] = 0;
multi_viewports_[i][1] = 0;
multi_viewports_[i][2] = size_in_tile * shadow_page_size_;
multi_viewports_[i][3] = size_in_tile * shadow_page_size_;
}
}
void ShadowModule::begin_sync()
@@ -856,13 +874,19 @@ void ShadowModule::end_sync()
do_full_update = false;
/* Put all pages in the free heap. */
for (uint i : IndexRange(shadow_page_len_)) {
uint2 page = {i % SHADOW_PAGE_PER_ROW, i / SHADOW_PAGE_PER_ROW};
pages_free_data_[i] = page.x | (page.y << 16u);
uint3 page = {i % SHADOW_PAGE_PER_ROW,
(i / SHADOW_PAGE_PER_ROW) % SHADOW_PAGE_PER_COL,
i / SHADOW_PAGE_PER_LAYER};
pages_free_data_[i] = shadow_page_pack(page);
}
for (uint i : IndexRange(shadow_page_len_, SHADOW_MAX_PAGE - shadow_page_len_)) {
pages_free_data_[i] = 0xFFFFFFFFu;
}
pages_free_data_.push_update();
/* Clear tiles to not reference any page. */
tilemap_pool.tiles_data.clear_to_zero();
tilemap_pool.tilemaps_clip.clear_to_zero();
/* Clear cached page buffer. */
GPU_storagebuf_clear(pages_cached_data_, -1);
@@ -873,7 +897,6 @@ void ShadowModule::end_sync()
pages_infos_data_.page_cached_next = 0u;
pages_infos_data_.page_cached_start = 0u;
pages_infos_data_.page_cached_end = 0u;
pages_infos_data_.page_size = shadow_page_size_;
pages_infos_data_.push_update();
}
@@ -1012,15 +1035,11 @@ void ShadowModule::end_sync()
sub.bind_ssbo("view_infos_buf", &shadow_multi_view_.matrices_ubo_get());
sub.bind_ssbo("statistics_buf", statistics_buf_.current());
sub.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf_);
sub.bind_ssbo("clear_page_buf", clear_page_buf_);
sub.bind_ssbo("clear_list_buf", clear_list_buf_);
sub.bind_ssbo("render_map_buf", render_map_buf_);
sub.bind_ssbo("viewport_index_buf", viewport_index_buf_);
sub.bind_ssbo("pages_infos_buf", pages_infos_data_);
sub.bind_image("tilemaps_img", tilemap_pool.tilemap_tx);
sub.bind_image("render_map_lod0_img", render_map_tx_.mip_view(0));
sub.bind_image("render_map_lod1_img", render_map_tx_.mip_view(1));
sub.bind_image("render_map_lod2_img", render_map_tx_.mip_view(2));
sub.bind_image("render_map_lod3_img", render_map_tx_.mip_view(3));
sub.bind_image("render_map_lod4_img", render_map_tx_.mip_view(4));
sub.bind_image("render_map_lod5_img", render_map_tx_.mip_view(5));
sub.dispatch(int3(1, 1, tilemap_pool.tilemaps_data.size()));
sub.barrier(GPU_BARRIER_SHADER_STORAGE | GPU_BARRIER_UNIFORM | GPU_BARRIER_TEXTURE_FETCH |
GPU_BARRIER_SHADER_IMAGE_ACCESS);
@@ -1028,10 +1047,12 @@ void ShadowModule::end_sync()
{
/** Clear pages that need to be rendered. */
PassSimple::Sub &sub = pass.sub("RenderClear");
sub.framebuffer_set(&render_fb_);
sub.state_set(DRW_STATE_WRITE_DEPTH | DRW_STATE_DEPTH_ALWAYS);
sub.shader_set(inst_.shaders.static_shader_get(SHADOW_PAGE_CLEAR));
sub.bind_ssbo("pages_infos_buf", pages_infos_data_);
sub.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf_);
sub.bind_image("atlas_img", atlas_tx_);
sub.bind_ssbo("clear_list_buf", clear_list_buf_);
sub.bind_image("shadow_atlas_img", atlas_tx_);
sub.dispatch(clear_dispatch_buf_);
sub.barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS);
}
@@ -1133,6 +1154,9 @@ void ShadowModule::set_view(View &view)
usage_tag_fb.ensure(usage_tag_fb_resolution_);
render_fb_.ensure(int2(SHADOW_TILEMAP_RES * shadow_page_size_));
GPU_framebuffer_bind(render_fb_);
GPU_framebuffer_multi_viewports_set(render_fb_,
reinterpret_cast<int(*)[4]>(multi_viewports_.data()));
inst_.hiz_buffer.update();
@@ -1151,6 +1175,8 @@ void ShadowModule::set_view(View &view)
shadow_multi_view_.compute_procedural_bounds();
inst_.pipelines.shadow.render(shadow_multi_view_);
GPU_memory_barrier(GPU_BARRIER_SHADER_IMAGE_ACCESS | GPU_BARRIER_TEXTURE_FETCH);
}
DRW_stats_group_end();

View File

@@ -71,8 +71,6 @@ struct ShadowTileMap : public ShadowTileMapData {
eCubeFace cubeface = Z_NEG;
/** Cached, used for detecting updates. */
float4x4 object_mat;
/** Near and far clip distances. For clip-map, computed on the GPU using casters BBoxes. */
float near, far;
public:
ShadowTileMap(int tiles_index_)
@@ -209,9 +207,13 @@ class ShadowModule {
StorageVectorBuffer<uint, 128> curr_casters_ = {"CurrCasters"};
/** Indirect arguments for page clearing. */
StorageBuffer<DispatchCommand> clear_dispatch_buf_;
/** Pages to clear. */
StorageArrayBuffer<uint, SHADOW_MAX_PAGE> clear_page_buf_ = {"clear_page_buf"};
DispatchIndirectBuf clear_dispatch_buf_;
/** Array containing a compact stream of tiles to clear. */
StorageArrayBuffer<uint, SHADOW_RENDER_MAP_SIZE, true> clear_list_buf_ = {"clear_list_buf"};
/** Tile to pages mapping. */
StorageArrayBuffer<uint, SHADOW_RENDER_MAP_SIZE, true> render_map_buf_ = {"render_map_buf"};
/** View to viewport index mapping. */
StorageArrayBuffer<uint, SHADOW_VIEW_MAX, true> viewport_index_buf_ = {"viewport_index_buf"};
int3 dispatch_depth_scan_size_;
/* Ratio between tile-map pixel world "radius" and film pixel world "radius". */
@@ -254,17 +256,10 @@ class ShadowModule {
/** Multi-View containing a maximum of 64 view to be rendered with the shadow pipeline. */
View shadow_multi_view_ = {"ShadowMultiView", SHADOW_VIEW_MAX, true};
/** Tile to physical page mapping. This is an array texture with one layer per view. */
Texture render_map_tx_ = {"ShadowRenderMap",
GPU_R32UI,
GPU_TEXTURE_USAGE_SHADER_READ | GPU_TEXTURE_USAGE_SHADER_WRITE |
GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW,
int2(SHADOW_TILEMAP_RES),
64,
nullptr,
SHADOW_TILEMAP_LOD + 1};
/** An empty frame-buffer (no attachment) the size of a whole tile-map. */
/** Framebuffer with the atlas_tx attached. */
Framebuffer render_fb_;
/** Arrays of viewports to rendering each tile to. */
std::array<int4, 16> multi_viewports_;
/** \} */

View File

@@ -11,7 +11,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@@ -9,7 +9,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@@ -9,7 +9,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@@ -11,7 +11,7 @@ void main()
{
DRW_VIEW_FROM_RESOURCE_ID;
#ifdef MAT_SHADOW
shadow_interp.view_id = drw_view_id;
shadow_viewport_layer_set(int(drw_view_id), int(viewport_index_buf[drw_view_id]));
#endif
init_interface();

View File

@@ -5,9 +5,7 @@ void main()
{
int index = int(gl_GlobalInvocationID.x);
if (index < tilemaps_clip_buf_len) {
tilemaps_clip_buf[index].clip_near_stored = 0;
tilemaps_clip_buf[index].clip_far_stored = 0;
tilemaps_clip_buf[index].clip_near = floatBitsToOrderedInt(-FLT_MAX);
tilemaps_clip_buf[index].clip_far = floatBitsToOrderedInt(FLT_MAX);
tilemaps_clip_buf[index].clip_far = floatBitsToOrderedInt(-FLT_MAX);
tilemaps_clip_buf[index].clip_near = floatBitsToOrderedInt(FLT_MAX);
}
}

View File

@@ -3,7 +3,7 @@
/** \a unormalized_uv is the uv coordinates for the whole tilemap [0..SHADOW_TILEMAP_RES]. */
vec2 shadow_page_uv_transform(
vec2 atlas_size, uvec2 page, uint lod, vec2 unormalized_uv, ivec2 tile_lod0_coord)
vec2 atlas_size, uvec3 page, uint lod, vec2 unormalized_uv, ivec2 tile_lod0_coord)
{
/* Bias uv sample for LODs since custom raster aligns LOD pixels instead of centering them. */
if (lod != 0) {
@@ -13,7 +13,7 @@ vec2 shadow_page_uv_transform(
vec2 target_tile = vec2(tile_lod0_coord >> lod);
vec2 page_uv = unormalized_uv * lod_scaling - target_tile;
/* Assumes atlas is squared. */
vec2 atlas_uv = (vec2(page) + min(page_uv, 0.99999)) * float(SHADOW_PAGE_RES) / atlas_size;
vec2 atlas_uv = (vec2(page.xy) + min(page_uv, 0.99999)) * float(SHADOW_PAGE_RES) / atlas_size;
return atlas_uv;
}
@@ -84,8 +84,6 @@ float shadow_slope_bias_get(vec2 atlas_size, LightData light, vec3 lNg, vec3 lP,
{
/* Compute coordinate inside the pixel we are sampling. */
vec2 uv_subpixel_coord = fract(uv * atlas_size);
/* Bias uv sample for LODs since custom raster aligns LOD pixels instead of centering them. */
uv_subpixel_coord += (lod > 0) ? -exp2(-1.0 - float(lod)) : 0.0;
/* Compute delta to the texel center (where the sample is). */
vec2 ndc_texel_center_delta = uv_subpixel_coord * 2.0 - 1.0;
/* Create a normal plane equation and go through the normal projection matrix. */
@@ -98,7 +96,7 @@ float shadow_slope_bias_get(vec2 atlas_size, LightData light, vec3 lNg, vec3 lP,
/* Compute slope to where the receiver should be by extending the plane to the texel center. */
float bias = dot(ndc_slope, ndc_texel_center_delta);
/* Bias for 1 pixel of the sampled LOD. */
bias /= ((SHADOW_TILEMAP_RES * SHADOW_PAGE_RES) >> lod);
bias /= float((SHADOW_TILEMAP_RES * SHADOW_PAGE_RES) >> lod);
return bias;
}
@@ -117,14 +115,16 @@ struct ShadowSample {
ShadowTileData tile;
};
float shadow_tile_depth_get(usampler2D atlas_tx, ShadowTileData tile, vec2 atlas_uv)
float shadow_tile_depth_get(usampler2DArray atlas_tx, ShadowTileData tile, vec2 atlas_uv)
{
if (!tile.is_allocated) {
/* Far plane distance but with a bias to make sure there will be no shadowing.
* But also not FLT_MAX since it can cause issue with projection. */
return 1.1;
}
return uintBitsToFloat(texture(atlas_tx, atlas_uv).r);
uint raw_bits = texture(atlas_tx, vec3(atlas_uv, float(tile.page.z))).r;
float depth = uintBitsToFloat(raw_bits);
return depth;
}
vec2 shadow_punctual_linear_depth(vec2 z, float near, float far)
@@ -137,11 +137,11 @@ vec2 shadow_punctual_linear_depth(vec2 z, float near, float far)
float shadow_directional_linear_depth(float z, float near, float far)
{
return z * (near - far) - near;
return z * (far - near) + near;
}
ShadowSample shadow_punctual_sample_get(
usampler2D atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 lP, vec3 lNg)
usampler2DArray atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 lP, vec3 lNg)
{
int face_id = shadow_punctual_face_index_get(lP);
lNg = shadow_punctual_local_position_to_face_local(face_id, lNg);
@@ -176,7 +176,7 @@ ShadowSample shadow_punctual_sample_get(
}
ShadowSample shadow_directional_sample_get(
usampler2D atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 P, vec3 lNg)
usampler2DArray atlas_tx, usampler2D tilemaps_tx, LightData light, vec3 P, vec3 lNg)
{
vec3 lP = shadow_world_to_local(light, P);
ShadowCoordinates coord = shadow_directional_coordinates(light, lP);
@@ -198,13 +198,13 @@ ShadowSample shadow_directional_sample_get(
/* Receiver distance needs to also be increasing.
* Negate since Z distance follows blender camera convention of -Z as forward. */
float receiver_dist = -lP.z;
samp.bias *= near - far;
samp.bias *= far - near;
samp.occluder_delta = samp.occluder_dist - receiver_dist;
return samp;
}
ShadowSample shadow_sample(const bool is_directional,
usampler2D atlas_tx,
usampler2DArray atlas_tx,
usampler2D tilemaps_tx,
LightData light,
vec3 lL,

View File

@@ -23,6 +23,7 @@ void main()
ShadowTileData tile = shadow_tile_unpack(tiles_buf[tile_index]);
if (tile.is_used && !tile.is_allocated) {
shadow_page_alloc(tile);
tile.lod = lod;
tiles_buf[tile_index] = shadow_tile_pack(tile);
}

View File

@@ -9,9 +9,10 @@
void main()
{
uvec2 page_co = unpackUvec2x16(clear_page_buf[gl_GlobalInvocationID.z]);
uvec2 page_texel = page_co * pages_infos_buf.page_size + gl_GlobalInvocationID.xy;
uint page_packed = clear_list_buf[gl_GlobalInvocationID.z];
uvec3 page_co = shadow_page_unpack(page_packed);
page_co.xy = page_co.xy * SHADOW_PAGE_RES + gl_GlobalInvocationID.xy;
/* Clear to FLT_MAX instead of 1 so the far plane doesn't cast shadows onto farther objects. */
imageStore(atlas_img, ivec2(page_texel), uvec4(floatBitsToUint(FLT_MAX)));
imageStore(shadow_atlas_img, ivec3(page_co), uvec4(floatBitsToUint(FLT_MAX)));
}

View File

@@ -106,13 +106,13 @@ void main()
pages_infos_buf.page_cached_start = src;
pages_infos_buf.page_cached_end = end;
pages_infos_buf.page_alloc_count = 0;
pages_infos_buf.view_count = 0;
/* Stats. */
statistics_buf.page_used_count = 0;
statistics_buf.page_update_count = 0;
statistics_buf.page_allocated_count = 0;
statistics_buf.page_rendered_count = 0;
statistics_buf.view_needed_count = 0;
/* Wrap the cursor to avoid unsigned overflow. We do not do modulo arithmetic because it would
* produce a 0 length buffer if the buffer is full. */
@@ -123,7 +123,7 @@ void main()
}
/* Reset clear command indirect buffer. */
clear_dispatch_buf.num_groups_x = pages_infos_buf.page_size / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_y = pages_infos_buf.page_size / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_x = SHADOW_PAGE_RES / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_y = SHADOW_PAGE_RES / SHADOW_PAGE_CLEAR_GROUP_SIZE;
clear_dispatch_buf.num_groups_z = 0;
}

View File

@@ -37,9 +37,9 @@ void shadow_page_free(inout ShadowTileData tile)
int index = atomicAdd(pages_infos_buf.page_free_count, 1);
assert(index < SHADOW_MAX_PAGE);
/* Insert in heap. */
pages_free_buf[index] = packUvec2x16(tile.page);
pages_free_buf[index] = shadow_page_pack(tile.page);
/* Remove from tile. */
tile.page = uvec2(-1);
tile.page = uvec3(-1);
tile.is_cached = false;
tile.is_allocated = false;
}
@@ -55,7 +55,7 @@ void shadow_page_alloc(inout ShadowTileData tile)
return;
}
/* Insert in tile. */
tile.page = unpackUvec2x16(pages_free_buf[index]);
tile.page = shadow_page_unpack(pages_free_buf[index]);
tile.is_allocated = true;
tile.do_update = true;
/* Remove from heap. */
@@ -70,9 +70,9 @@ void shadow_page_cache_append(inout ShadowTileData tile, uint tile_index)
/* The page_cached_next is also wrapped in the defrag phase to avoid unsigned overflow. */
uint index = atomicAdd(pages_infos_buf.page_cached_next, 1u) % uint(SHADOW_MAX_PAGE);
/* Insert in heap. */
pages_cached_buf[index] = uvec2(packUvec2x16(tile.page), tile_index);
pages_cached_buf[index] = uvec2(shadow_page_pack(tile.page), tile_index);
/* Remove from tile. */
tile.page = uvec2(-1);
tile.page = uvec3(-1);
tile.cache_index = index;
tile.is_cached = true;
tile.is_allocated = false;
@@ -86,7 +86,7 @@ void shadow_page_cache_remove(inout ShadowTileData tile)
uint index = tile.cache_index;
/* Insert in tile. */
tile.page = unpackUvec2x16(pages_cached_buf[index].x);
tile.page = shadow_page_unpack(pages_cached_buf[index].x);
tile.cache_index = uint(-1);
tile.is_cached = false;
tile.is_allocated = true;

View File

@@ -35,7 +35,7 @@ void main()
float local_min = FLT_MAX;
float local_max = -FLT_MAX;
for (int i = 0; i < 8; i++) {
float z = dot(box.corners[i].xyz, light._back);
float z = dot(box.corners[i].xyz, -light._back);
local_min = min(local_min, z);
local_max = max(local_max, z);
}
@@ -59,14 +59,14 @@ void main()
if (gl_LocalInvocationID.x == 0) {
/* Final result. Min/Max of the whole dispatch. */
atomicMin(light_buf[l_idx].clip_far, global_min);
atomicMax(light_buf[l_idx].clip_near, global_max);
atomicMin(light_buf[l_idx].clip_near, global_min);
atomicMax(light_buf[l_idx].clip_far, global_max);
/* TODO(fclem): This feel unecessary but we currently have no indexing from
* tilemap to lights. This is because the lights are selected by culling phase. */
for (int i = light.tilemap_index; i <= light_tilemap_max_get(light); i++) {
int index = tilemaps_buf[i].clip_data_index;
atomicMin(tilemaps_clip_buf[index].clip_far, global_min);
atomicMax(tilemaps_clip_buf[index].clip_near, global_max);
atomicMin(tilemaps_clip_buf[index].clip_near, global_min);
atomicMax(tilemaps_clip_buf[index].clip_far, global_max);
}
}

View File

@@ -8,171 +8,171 @@
*/
#pragma BLENDER_REQUIRE(gpu_shader_utildefines_lib.glsl)
#pragma BLENDER_REQUIRE(gpu_shader_math_matrix_lib.glsl)
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
shared uint tile_updates_count;
shared ivec2 rect_min;
shared ivec2 rect_max;
shared int view_index;
void page_clear_buf_append(uint page_packed)
/**
* Select the smallest viewport that can contain the given rect of tiles to render.
* Returns the viewport index.
*/
int viewport_select(ivec2 rect_size)
{
uint clear_page_index = atomicAdd(clear_dispatch_buf.num_groups_z, 1u);
clear_page_buf[clear_page_index] = page_packed;
/* TODO(fclem): Experiment with non squared viewports. */
int max_dim = max(rect_size.x, rect_size.y);
/* Assumes max_dim is non-null. */
int power_of_two = int(findMSB(uint(max_dim)));
if ((1 << power_of_two) != max_dim) {
power_of_two += 1;
}
return power_of_two;
}
void page_tag_as_rendered(ivec2 tile_co, int tiles_index, int lod)
/**
* Select the smallest viewport that can contain the given rect of tiles to render.
* Returns the viewport size in tile.
*/
ivec2 viewport_size_get(int viewport_index)
{
int tile_index = shadow_tile_offset(tile_co, tiles_index, lod);
tiles_buf[tile_index] |= SHADOW_IS_RENDERED;
atomicAdd(statistics_buf.page_rendered_count, 1);
/* TODO(fclem): Experiment with non squared viewports. */
return ivec2(1 << viewport_index);
}
void main()
{
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
tile_updates_count = uint(0);
}
barrier();
int tilemap_index = int(gl_GlobalInvocationID.z);
ivec2 tile_co = ivec2(gl_GlobalInvocationID.xy);
ivec2 atlas_texel = shadow_tile_coord_in_atlas(tile_co, tilemap_index);
ShadowTileMapData tilemap_data = tilemaps_buf[tilemap_index];
int lod_max = (tilemap_data.projection_type == SHADOW_PROJECTION_CUBEFACE) ? SHADOW_TILEMAP_LOD :
0;
int lod_valid = 0;
/* One bit per lod. */
int do_lod_update = 0;
/* Packed page (packUvec2x16) to render per LOD. */
uint updated_lod_page[SHADOW_TILEMAP_LOD + 1];
uvec2 page_valid;
bool is_cubemap = (tilemap_data.projection_type == SHADOW_PROJECTION_CUBEFACE);
int lod_max = is_cubemap ? SHADOW_TILEMAP_LOD : 0;
int valid_tile_index = -1;
/* With all threads (LOD0 size dispatch) load each lod tile from the highest lod
* to the lowest, keeping track of the lowest one allocated which will be use for shadowing.
* Also save which page are to be updated. */
for (int lod = SHADOW_TILEMAP_LOD; lod >= 0; lod--) {
if (lod > lod_max) {
updated_lod_page[lod] = 0xFFFFFFFFu;
continue;
}
int tile_index = shadow_tile_offset(tile_co >> lod, tilemap_data.tiles_index, lod);
* This guarantee a O(1) lookup time.
* Add one render view per LOD that has tiles to be rendered. */
for (int lod = lod_max; lod >= 0; lod--) {
ivec2 tile_co_lod = tile_co >> lod;
int tile_index = shadow_tile_offset(tile_co_lod, tilemap_data.tiles_index, lod);
ShadowTileData tile = shadow_tile_unpack(tiles_buf[tile_index]);
if (tile.is_used && tile.do_update) {
do_lod_update = 1 << lod;
updated_lod_page[lod] = packUvec2x16(tile.page);
}
else {
updated_lod_page[lod] = 0xFFFFFFFFu;
/* Compute update area. */
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
rect_min = ivec2(SHADOW_TILEMAP_RES);
rect_max = ivec2(0);
view_index = -1;
}
/* Save highest lod for this thread. */
if (tile.is_used && lod > 0) {
/* Reload the page in case there was an allocation in the valid thread. */
page_valid = tile.page;
lod_valid = lod;
}
else if (lod == 0 && lod_valid != 0 && !tile.is_allocated) {
/* If the tile is not used, store the valid LOD level in LOD0. */
tile.page = page_valid;
tile.lod = lod_valid;
/* This is not a real ownership. It is just a tag so that the shadowing is deemed correct. */
tile.is_allocated = true;
barrier();
bool lod_valid_thread = all(equal(tile_co, tile_co_lod << lod));
bool do_page_render = tile.is_used && tile.do_update && lod_valid_thread;
if (do_page_render) {
atomicMin(rect_min.x, tile_co_lod.x);
atomicMin(rect_min.y, tile_co_lod.y);
atomicMax(rect_max.x, tile_co_lod.x + 1);
atomicMax(rect_max.y, tile_co_lod.y + 1);
}
if (lod == 0) {
imageStore(tilemaps_img, atlas_texel, uvec4(shadow_tile_pack(tile)));
}
}
barrier();
if (do_lod_update > 0) {
atomicAdd(tile_updates_count, 1u);
}
int viewport_index = viewport_select(rect_max - rect_min);
ivec2 viewport_size = viewport_size_get(viewport_index);
barrier();
/* Issue one view if there is an update in the LOD. */
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
bool lod_has_update = rect_min.x < rect_max.x;
if (lod_has_update) {
view_index = atomicAdd(statistics_buf.view_needed_count, 1);
if (view_index < SHADOW_VIEW_MAX) {
/* Setup the view. */
viewport_index_buf[view_index] = viewport_index;
if (all(equal(gl_LocalInvocationID, uvec3(0)))) {
/* No update by default. */
view_index = 64;
view_infos_buf[view_index].viewmat = tilemap_data.viewmat;
view_infos_buf[view_index].viewinv = inverse(tilemap_data.viewmat);
if (tile_updates_count > 0) {
view_index = atomicAdd(pages_infos_buf.view_count, 1);
if (view_index < 64) {
view_infos_buf[view_index].viewmat = tilemap_data.viewmat;
view_infos_buf[view_index].viewinv = inverse(tilemap_data.viewmat);
float lod_res = float(SHADOW_TILEMAP_RES >> lod);
/* TODO(fclem): These should be the culling planes. */
// vec2 cull_region_start = (vec2(rect_min) / lod_res) * 2.0 - 1.0;
// vec2 cull_region_end = (vec2(rect_max) / lod_res) * 2.0 - 1.0;
vec2 view_start = (vec2(rect_min) / lod_res) * 2.0 - 1.0;
vec2 view_end = (vec2(rect_min + viewport_size) / lod_res) * 2.0 - 1.0;
if (tilemap_data.projection_type != SHADOW_PROJECTION_CUBEFACE) {
int clip_index = tilemap_data.clip_data_index;
/* For directionnal, we need to modify winmat to encompass all casters. */
float clip_far = -tilemaps_clip_buf[clip_index].clip_far_stored;
float clip_near = -tilemaps_clip_buf[clip_index].clip_near_stored;
tilemap_data.winmat[2][2] = -2.0 / (clip_far - clip_near);
tilemap_data.winmat[3][2] = -(clip_far + clip_near) / (clip_far - clip_near);
float clip_far = tilemaps_clip_buf[clip_index].clip_far_stored;
float clip_near = tilemaps_clip_buf[clip_index].clip_near_stored;
mat4x4 winmat;
if (tilemap_data.projection_type != SHADOW_PROJECTION_CUBEFACE) {
view_start *= tilemap_data.half_size;
view_end *= tilemap_data.half_size;
view_start += tilemap_data.center_offset;
view_end += tilemap_data.center_offset;
winmat = projection_orthographic(
view_start.x, view_end.x, view_start.y, view_end.y, clip_near, clip_far);
}
else {
view_start *= clip_near;
view_end *= clip_near;
winmat = projection_perspective(
view_start.x, view_end.x, view_start.y, view_end.y, clip_near, clip_far);
}
view_infos_buf[view_index].winmat = winmat;
view_infos_buf[view_index].wininv = inverse(winmat);
}
view_infos_buf[view_index].winmat = tilemap_data.winmat;
view_infos_buf[view_index].wininv = inverse(tilemap_data.winmat);
}
}
barrier();
bool lod_is_rendered = (view_index >= 0) && (view_index < SHADOW_VIEW_MAX);
if (lod_is_rendered && lod_valid_thread) {
/* Tile coordinate relative to chosen viewport origin. */
ivec2 viewport_tile_co = tile_co_lod - rect_min;
/* We need to add page indirection to the render map for the whole viewport even if this one
* might extend outside of the shadowmap range. To this end, we need to wrap the threads to
* always cover the whole mip. This is because the viewport cannot be bigger than the mip
* level itself. */
int lod_res = SHADOW_TILEMAP_RES >> lod;
ivec2 relative_tile_co = (viewport_tile_co + lod_res) % lod_res;
if (all(lessThan(relative_tile_co, viewport_size))) {
uint page_packed = shadow_page_pack(tile.page);
/* Add page to render map. */
int render_page_index = shadow_render_page_index_get(view_index, relative_tile_co);
render_map_buf[render_page_index] = do_page_render ? page_packed : 0xFFFFFFFFu;
if (do_page_render) {
/* Tag tile as rendered. There is a barrier after the read. So it is safe. */
tiles_buf[tile_index] |= SHADOW_IS_RENDERED;
/* Add page to clear list. */
uint clear_page_index = atomicAdd(clear_dispatch_buf.num_groups_z, 1u);
clear_list_buf[clear_page_index] = page_packed;
/* Statistics. */
atomicAdd(statistics_buf.page_rendered_count, 1);
}
}
}
if (tile.is_used && tile.is_allocated && (!tile.do_update || lod_is_rendered)) {
/* Save highest lod for this thread. */
valid_tile_index = tile_index;
}
}
barrier();
if (view_index < 64) {
ivec3 render_map_texel = ivec3(tile_co, view_index);
/* Store page indirection for rendering. Update every texel in the view array level. */
if (true) {
imageStore(render_map_lod0_img, render_map_texel, uvec4(updated_lod_page[0]));
if (updated_lod_page[0] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[0]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 0);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 1u))) {
imageStore(render_map_lod1_img, render_map_texel, uvec4(updated_lod_page[1]));
if (updated_lod_page[1] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[1]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 1);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 2u))) {
imageStore(render_map_lod2_img, render_map_texel, uvec4(updated_lod_page[2]));
if (updated_lod_page[2] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[2]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 2);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 3u))) {
imageStore(render_map_lod3_img, render_map_texel, uvec4(updated_lod_page[3]));
if (updated_lod_page[3] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[3]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 3);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 4u))) {
imageStore(render_map_lod4_img, render_map_texel, uvec4(updated_lod_page[4]));
if (updated_lod_page[4] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[4]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 4);
}
}
render_map_texel.xy >>= 1;
if (all(equal(tile_co, render_map_texel.xy << 5u))) {
imageStore(render_map_lod5_img, render_map_texel, uvec4(updated_lod_page[5]));
if (updated_lod_page[5] != 0xFFFFFFFFu) {
page_clear_buf_append(updated_lod_page[5]);
page_tag_as_rendered(render_map_texel.xy, tilemap_data.tiles_index, 5);
}
}
}
/* Store the highest LOD valid page for rendering. */
uint tile_packed = (valid_tile_index != -1) ? tiles_buf[valid_tile_index] : SHADOW_NO_DATA;
imageStore(tilemaps_img, atlas_texel, uvec4(tile_packed));
if (all(equal(gl_GlobalInvocationID, uvec3(0)))) {
/* Clamp it as it can underflow if there is too much tile present on screen. */

View File

@@ -11,7 +11,7 @@
#pragma BLENDER_REQUIRE(common_math_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
shared int directional_range_changed;
shared bool directional_range_changed;
ShadowTileDataPacked init_tile_data(ShadowTileDataPacked tile, bool do_update)
{
@@ -36,6 +36,8 @@ void main()
/* Reset shift to not tag for update more than once per sync cycle. */
tilemaps_buf[tilemap_index].grid_shift = ivec2(0);
directional_range_changed = false;
int clip_index = tilemap.clip_data_index;
if (clip_index == -1) {
/* Noop. This is the case for unused tilemaps that are getting pushed to the free heap. */
@@ -46,13 +48,18 @@ void main()
float clip_far_new = orderedIntBitsToFloat(clip_data.clip_far);
bool near_changed = clip_near_new != clip_data.clip_near_stored;
bool far_changed = clip_far_new != clip_data.clip_far_stored;
directional_range_changed = int(near_changed || far_changed);
directional_range_changed = near_changed || far_changed;
/* NOTE(fclem): This assumes clip near/far are computed each time the init phase runs. */
tilemaps_clip_buf[clip_index].clip_near_stored = clip_near_new;
tilemaps_clip_buf[clip_index].clip_far_stored = clip_far_new;
/* Reset for next update. */
tilemaps_clip_buf[clip_index].clip_near = floatBitsToOrderedInt(-FLT_MAX);
tilemaps_clip_buf[clip_index].clip_far = floatBitsToOrderedInt(FLT_MAX);
tilemaps_clip_buf[clip_index].clip_near = floatBitsToOrderedInt(FLT_MAX);
tilemaps_clip_buf[clip_index].clip_far = floatBitsToOrderedInt(-FLT_MAX);
}
else {
/* For cubefaces, simply use the light near and far distances. */
tilemaps_clip_buf[clip_index].clip_near_stored = tilemap.clip_near;
tilemaps_clip_buf[clip_index].clip_far_stored = tilemap.clip_far;
}
}
@@ -68,7 +75,7 @@ void main()
bool do_update = !in_range_inclusive(tile_shifted, ivec2(0), ivec2(SHADOW_TILEMAP_RES - 1));
/* TODO(fclem): Might be better to resize the depth stored instead of a full render update. */
if (tilemap.projection_type != SHADOW_PROJECTION_CUBEFACE && directional_range_changed != 0) {
if (directional_range_changed) {
do_update = true;
}

View File

@@ -246,3 +246,18 @@ Pyramid shadow_tilemap_cubeface_bounds(ShadowTileMapData tilemap,
}
/** \} */
/* ---------------------------------------------------------------------- */
/** \name Render map layout.
*
* Since a view can cover at most the number of tile contained in LOD0,
* index every LOD like they were LOD0.
* \{ */
int shadow_render_page_index_get(int view_index, ivec2 tile_coordinate_in_lod)
{
return view_index * SHADOW_TILEMAP_LOD0_LEN + tile_coordinate_in_lod.y * SHADOW_TILEMAP_RES +
tile_coordinate_in_lod.x;
}
/** \} */

View File

@@ -124,3 +124,21 @@ void init_interface()
drw_ResourceID_iface.resource_index = resource_id;
#endif
}
#ifdef GPU_VERTEX_SHADER
void shadow_viewport_layer_set(int view_id, int lod)
{
/* We still render to a layered framebuffer in the case of Metal + Tile Based Renderer.
* Since it needs correct depth buffering, each view needs to not overlap each others.
* It doesn't matter much for other platform, so we use that as a way to pass the view id. */
gpu_Layer = view_id;
gpu_ViewportIndex = lod;
}
#endif
#ifdef GPU_FRAGMENT_SHADER
int shadow_view_id_get()
{
return gpu_Layer;
}
#endif

View File

@@ -15,40 +15,7 @@
#pragma BLENDER_REQUIRE(eevee_nodetree_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_transparency_lib.glsl)
#pragma BLENDER_REQUIRE(eevee_sampling_lib.glsl)
void write_depth(ivec2 texel_co, const int lod, ivec2 tile_co, float depth)
{
ivec2 texel_co_lod = texel_co >> lod;
ivec2 lod_corner_in_lod0 = texel_co_lod << lod;
/* Add half of the lod to get the top right pixel nearest to the lod pixel.
* This way we never get more than half a LOD0 pixel of offset from the center of any LOD.
* This offset is taken into account during sampling. */
const int lod_half_stride_in_lod0 = (1 << lod) / 2;
ivec2 closest_lod0_texel = lod_corner_in_lod0 + lod_half_stride_in_lod0;
if (!all(equal(closest_lod0_texel, texel_co))) {
return;
}
ivec3 render_map_coord = ivec3(tile_co >> lod, shadow_interp.view_id);
uint page_packed = texelFetch(shadow_render_map_tx, render_map_coord, lod).r;
/* Return if no valid page. */
if (page_packed == 0xFFFFFFFFu) {
return;
}
ivec2 page = ivec2(unpackUvec2x16(page_packed));
ivec2 texel_in_page = texel_co_lod % pages_infos_buf.page_size;
ivec2 out_texel = page * pages_infos_buf.page_size + texel_in_page;
uint u_depth = floatBitsToUint(depth);
/* Quantization bias. Equivalent to nextafter in C without all the safety. 1 is not enough. */
u_depth += 2;
/* TOOD(Metal): For Metal, textures will need to be viewed as buffers to workaround missing image
* atomics support. */
imageAtomicMin(shadow_atlas_img, out_texel, u_depth);
}
#pragma BLENDER_REQUIRE(eevee_shadow_tilemap_lib.glsl)
void main()
{
@@ -67,27 +34,26 @@ void main()
}
#endif
drw_view_id = shadow_interp.view_id;
#ifdef USE_ATOMIC
ivec2 texel_co = ivec2(gl_FragCoord.xy);
ivec2 tile_co = texel_co / pages_infos_buf.page_size;
float depth = gl_FragCoord.z;
float slope_bias = fwidth(depth);
write_depth(texel_co, 0, tile_co, depth + slope_bias);
/* Using bitwise ops is way faster than integer ops. */
const int page_shift = SHADOW_PAGE_LOD;
const int page_mask = ~(0xFFFFFFFF << SHADOW_PAGE_LOD);
/* Only needed for local lights. */
bool is_persp = (drw_view.winmat[3][3] == 0.0);
if (is_persp) {
/* Note that even if texel center is offset, we store unmodified depth.
* We increase bias instead at sampling time. */
#if SHADOW_TILEMAP_LOD != 5
# error This needs to be adjusted
ivec2 tile_co = texel_co >> page_shift;
ivec2 texel_page = texel_co & page_mask;
int view_index = shadow_view_id_get();
int render_page_index = shadow_render_page_index_get(view_index, tile_co);
uint page_packed = render_map_buf[render_page_index];
ivec3 page = ivec3(shadow_page_unpack(page_packed));
ivec3 out_texel = ivec3((page.xy << page_shift) | texel_page, page.z);
uint u_depth = floatBitsToUint(gl_FragCoord.z + fwidth(gl_FragCoord.z));
/* Quantization bias. Equivalent to `nextafter()` in C without all the safety. */
u_depth += 2;
imageAtomicMin(shadow_atlas_img, out_texel, u_depth);
#endif
write_depth(texel_co, 1, tile_co, depth + slope_bias * 2.0);
write_depth(texel_co, 2, tile_co, depth + slope_bias * 4.0);
write_depth(texel_co, 3, tile_co, depth + slope_bias * 8.0);
write_depth(texel_co, 4, tile_co, depth + slope_bias * 16.0);
write_depth(texel_co, 5, tile_co, depth + slope_bias * 32.0);
}
}

View File

@@ -187,21 +187,26 @@ GPU_SHADER_CREATE_INFO(eevee_surf_world)
"eevee_camera",
"eevee_utility_texture");
GPU_SHADER_INTERFACE_INFO(eevee_shadow_iface, "shadow_interp").flat(Type::UINT, "view_id");
GPU_SHADER_CREATE_INFO(eevee_surf_shadow)
.define("DRW_VIEW_LEN", "64")
.define("MAT_SHADOW")
.define("USE_ATOMIC")
.vertex_out(eevee_surf_iface)
.vertex_out(eevee_surf_flat_iface)
.vertex_out(eevee_shadow_iface)
.sampler(SHADOW_RENDER_MAP_SLOT, ImageType::UINT_2D_ARRAY, "shadow_render_map_tx")
.image(SHADOW_ATLAS_SLOT,
.storage_buf(SHADOW_RENDER_MAP_BUF_SLOT,
Qualifier::READ,
"uint",
"render_map_buf[SHADOW_RENDER_MAP_SIZE]")
.storage_buf(SHADOW_VIEWPORT_INDEX_BUF_SLOT,
Qualifier::READ,
"uint",
"viewport_index_buf[SHADOW_VIEW_MAX]")
.storage_buf(SHADOW_PAGE_INFO_SLOT, Qualifier::READ, "ShadowPagesInfoData", "pages_infos_buf")
.image(SHADOW_ATLAS_IMG_SLOT,
GPU_R32UI,
Qualifier::READ_WRITE,
ImageType::UINT_2D,
ImageType::UINT_2D_ARRAY,
"shadow_atlas_img")
.storage_buf(SHADOW_PAGE_INFO_SLOT, Qualifier::READ, "ShadowPagesInfoData", "pages_infos_buf")
.fragment_source("eevee_surf_shadow_frag.glsl")
.additional_info("eevee_camera", "eevee_utility_texture", "eevee_sampling_data");

View File

@@ -14,6 +14,7 @@
* explicitly as uint for code generation, as the MSLShaderGenerator needs to be able to
* distinguish between classes and fundamental types during code generation. */
#define SHADOW_TILE_DATA_PACKED "uint"
#define SHADOW_PAGE_PACKED "uint"
GPU_SHADER_CREATE_INFO(eevee_shadow_clipmap_clear)
.do_static_compilation(true)
@@ -160,21 +161,17 @@ GPU_SHADER_CREATE_INFO(eevee_shadow_tilemap_finalize)
.do_static_compilation(true)
.typedef_source("draw_shader_shared.h")
.local_group_size(SHADOW_TILEMAP_RES, SHADOW_TILEMAP_RES)
.storage_buf(0, Qualifier::READ_WRITE, "ShadowTileMapData", "tilemaps_buf[]")
.storage_buf(0, Qualifier::READ, "ShadowTileMapData", "tilemaps_buf[]")
.storage_buf(1, Qualifier::READ_WRITE, SHADOW_TILE_DATA_PACKED, "tiles_buf[]")
.storage_buf(2, Qualifier::READ_WRITE, "ShadowPagesInfoData", "pages_infos_buf")
.storage_buf(3, Qualifier::WRITE, "ViewMatrices", "view_infos_buf[64]")
.storage_buf(3, Qualifier::WRITE, "ViewMatrices", "view_infos_buf[SHADOW_VIEW_MAX]")
.storage_buf(4, Qualifier::READ_WRITE, "ShadowStatistics", "statistics_buf")
.storage_buf(5, Qualifier::READ_WRITE, "DispatchCommand", "clear_dispatch_buf")
.storage_buf(6, Qualifier::READ_WRITE, "uint", "clear_page_buf[]")
.storage_buf(7, Qualifier::READ_WRITE, "ShadowTileMapClip", "tilemaps_clip_buf[]")
.storage_buf(6, Qualifier::WRITE, SHADOW_PAGE_PACKED, "clear_list_buf[SHADOW_RENDER_MAP_SIZE]")
.storage_buf(7, Qualifier::WRITE, SHADOW_PAGE_PACKED, "render_map_buf[SHADOW_RENDER_MAP_SIZE]")
.storage_buf(8, Qualifier::WRITE, "uint", "viewport_index_buf[SHADOW_VIEW_MAX]")
.storage_buf(9, Qualifier::READ, "ShadowTileMapClip", "tilemaps_clip_buf[]")
.image(0, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D, "tilemaps_img")
.image(1, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod0_img")
.image(2, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod1_img")
.image(3, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod2_img")
.image(4, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod3_img")
.image(5, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod4_img")
.image(6, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D_ARRAY, "render_map_lod5_img")
.additional_info("eevee_shared")
.compute_source("eevee_shadow_tilemap_finalize_comp.glsl");
@@ -182,8 +179,12 @@ GPU_SHADER_CREATE_INFO(eevee_shadow_page_clear)
.do_static_compilation(true)
.local_group_size(SHADOW_PAGE_CLEAR_GROUP_SIZE, SHADOW_PAGE_CLEAR_GROUP_SIZE)
.storage_buf(2, Qualifier::READ, "ShadowPagesInfoData", "pages_infos_buf")
.storage_buf(6, Qualifier::READ, "uint", "clear_page_buf[]")
.image(0, GPU_R32UI, Qualifier::WRITE, ImageType::UINT_2D, "atlas_img")
.storage_buf(6, Qualifier::READ, SHADOW_PAGE_PACKED, "clear_list_buf[SHADOW_RENDER_MAP_SIZE]")
.image(SHADOW_ATLAS_IMG_SLOT,
GPU_R32UI,
Qualifier::READ_WRITE,
ImageType::UINT_2D_ARRAY,
"shadow_atlas_img")
.additional_info("eevee_shared")
.compute_source("eevee_shadow_page_clear_comp.glsl");
@@ -194,7 +195,7 @@ GPU_SHADER_CREATE_INFO(eevee_shadow_page_clear)
* \{ */
GPU_SHADER_CREATE_INFO(eevee_shadow_data)
.sampler(SHADOW_ATLAS_TEX_SLOT, ImageType::UINT_2D, "shadow_atlas_tx")
.sampler(SHADOW_ATLAS_TEX_SLOT, ImageType::UINT_2D_ARRAY, "shadow_atlas_tx")
.sampler(SHADOW_TILEMAPS_TEX_SLOT, ImageType::UINT_2D, "shadow_tilemaps_tx");
/** \} */

View File

@@ -630,6 +630,7 @@ class Texture : NonCopyable {
float *data = nullptr,
int mip_len = 1)
{
BLI_assert(layers > 0);
return ensure_impl(extent, layers, 0, mip_len, format, usage, data, true, false);
}
@@ -657,6 +658,7 @@ class Texture : NonCopyable {
float *data = nullptr,
int mip_len = 1)
{
BLI_assert(layers > 0);
return ensure_impl(UNPACK2(extent), layers, mip_len, format, usage, data, true, false);
}

View File

@@ -50,12 +50,12 @@ static void test_eevee_shadow_shift_clear()
{
ShadowTileData tile;
tile.page = uint2(1, 2);
tile.page = uint3(1, 2, 0);
tile.is_used = true;
tile.do_update = true;
tiles_data[tile_lod0] = shadow_tile_pack(tile);
tile.page = uint2(3, 4);
tile.page = uint3(3, 2, 4);
tile.is_used = false;
tile.do_update = false;
tiles_data[tile_lod1] = shadow_tile_pack(tile);
@@ -72,19 +72,19 @@ static void test_eevee_shadow_shift_clear()
pass.bind_ssbo("tiles_buf", tiles_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data_);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tilemaps_data.read();
tiles_data.read();
EXPECT_EQ(tilemaps_data[0].grid_offset, int2(0));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).page, uint2(1, 2));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).page, uint3(1, 2, 0));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).is_used, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod0]).do_update, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).page, uint2(3, 4));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).page, uint3(3, 2, 4));
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).is_used, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[tile_lod1]).do_update, true);
@@ -99,9 +99,27 @@ static void test_eevee_shadow_shift()
GPU_render_begin();
ShadowTileMapDataBuf tilemaps_data = {"tilemaps_data"};
ShadowTileDataBuf tiles_data = {"tiles_data"};
ShadowTileMapClipBuf tilemaps_clip = {"tilemaps_clip"};
ShadowPageCacheBuf pages_cached_data_ = {"pages_cached_data_"};
StorageArrayBuffer<ShadowTileMapClip, SHADOW_MAX_TILEMAP> tilemaps_clip = {"tilemaps_clip"};
ShadowPageCacheBuf pages_cached_data = {"pages_cached_data"};
auto tile_co_to_page = [](int2 co) {
int page = co.x + co.y * SHADOW_TILEMAP_RES;
return uint3((page % SHADOW_PAGE_PER_ROW),
(page / SHADOW_PAGE_PER_ROW) % SHADOW_PAGE_PER_COL,
(page / SHADOW_PAGE_PER_LAYER));
};
{
ShadowTileMapClip clip = {};
clip.clip_near_stored = 0.0;
clip.clip_far_stored = 1.0;
clip.clip_near = 0x00000000; /* floatBitsToOrderedInt(0.0) */
clip.clip_far = 0x3F800000; /* floatBitsToOrderedInt(1.0) */
tilemaps_clip[0] = clip;
tilemaps_clip.push_update();
}
{
ShadowTileMapData tilemap = {};
tilemap.tiles_index = 0;
@@ -114,7 +132,6 @@ static void test_eevee_shadow_shift()
tilemaps_data.push_update();
}
{
ShadowTileData tile = shadow_tile_unpack(ShadowTileDataPacked(SHADOW_NO_DATA));
for (auto x : IndexRange(SHADOW_TILEMAP_RES)) {
@@ -122,7 +139,7 @@ static void test_eevee_shadow_shift()
tile.is_allocated = true;
tile.is_rendered = true;
tile.do_update = true;
tile.page = uint2(x, y);
tile.page = tile_co_to_page(int2(x, y));
tiles_data[x + y * SHADOW_TILEMAP_RES] = shadow_tile_pack(tile);
}
}
@@ -137,31 +154,33 @@ static void test_eevee_shadow_shift()
pass.bind_ssbo("tilemaps_buf", tilemaps_data);
pass.bind_ssbo("tilemaps_clip_buf", tilemaps_clip);
pass.bind_ssbo("tiles_buf", tiles_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data_);
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tilemaps_data.read();
tiles_data.read();
EXPECT_EQ(tilemaps_data[0].grid_offset, int2(0));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).page, uint2(SHADOW_TILEMAP_RES - 1, 2));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).page,
tile_co_to_page(int2(SHADOW_TILEMAP_RES - 1, 2)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).do_update, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0]).is_allocated, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).page, uint2(0, 2));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).page, tile_co_to_page(int2(0, 2)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).do_update, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1]).is_allocated, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).page,
uint2(SHADOW_TILEMAP_RES - 1, 4));
tile_co_to_page(int2(SHADOW_TILEMAP_RES - 1, 4)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).do_update, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[0 + SHADOW_TILEMAP_RES * 2]).is_allocated, true);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).page, uint2(0, 4));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).page,
tile_co_to_page(int2(0, 4)));
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).do_update, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).is_rendered, false);
EXPECT_EQ(shadow_tile_unpack(tiles_data[1 + SHADOW_TILEMAP_RES * 2]).is_allocated, true);
@@ -234,9 +253,9 @@ static void test_eevee_shadow_tag_update()
pass.bind_ssbo("bounds_buf", &manager.bounds_buf.current());
pass.bind_ssbo("resource_ids_buf", curr_casters_updated);
pass.dispatch(int3(curr_casters_updated.size(), 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
@@ -358,8 +377,10 @@ static void test_eevee_shadow_free()
int page_free_count = SHADOW_MAX_PAGE - 6;
for (uint i : IndexRange(2, page_free_count)) {
uint2 page = {i % SHADOW_PAGE_PER_ROW, i / SHADOW_PAGE_PER_ROW};
pages_free_data[i] = page.x | (page.y << 16u);
uint3 page = uint3((i % SHADOW_PAGE_PER_ROW),
(i / SHADOW_PAGE_PER_ROW) % SHADOW_PAGE_PER_COL,
(i / SHADOW_PAGE_PER_LAYER));
pages_free_data[i] = shadow_page_pack(page);
}
pages_free_data.push_update();
@@ -380,6 +401,9 @@ static void test_eevee_shadow_free()
{
ShadowTileData tile;
tiles_data.clear_to_zero();
tiles_data.read();
/* is_orphaned = true */
tile.is_used = false;
tile.do_update = true;
@@ -436,10 +460,10 @@ static void test_eevee_shadow_free()
pass.bind_ssbo("pages_free_buf", pages_free_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
pages_infos_data.read();
@@ -475,7 +499,7 @@ class TestDefrag {
ShadowPageHeapBuf pages_free_data = {"PagesFreeBuf"};
ShadowPageCacheBuf pages_cached_data = {"PagesCachedBuf"};
ShadowPagesInfoDataBuf pages_infos_data = {"PagesInfosBuf"};
StorageBuffer<DispatchCommand> clear_dispatch_buf;
StorageBuffer<DispatchCommand> clear_draw_buf;
ShadowStatisticsBuf statistics_buf = {"statistics_buf"};
public:
@@ -546,12 +570,12 @@ class TestDefrag {
pass.bind_ssbo("pages_free_buf", pages_free_data);
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf);
pass.bind_ssbo("clear_draw_buf", clear_draw_buf);
pass.dispatch(int3(1, 1, 1));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
pages_cached_data.read();
@@ -631,10 +655,11 @@ class TestAlloc {
pages_infos_data.page_cached_next = 0u;
pages_infos_data.page_cached_start = 0u;
pages_infos_data.page_cached_end = 0u;
pages_infos_data.view_count = 0u;
pages_infos_data.page_size = 256u;
pages_infos_data.push_update();
statistics_buf.view_needed_count = 0;
statistics_buf.push_update();
int tile_allocated = tiles_index * SHADOW_TILEDATA_PER_TILEMAP + 5;
int tile_free = tiles_index * SHADOW_TILEDATA_PER_TILEMAP + 6;
@@ -672,10 +697,10 @@ class TestAlloc {
pass.bind_ssbo("pages_cached_buf", pages_cached_data);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE);
tiles_data.read();
pages_infos_data.read();
@@ -711,7 +736,8 @@ static void test_eevee_shadow_finalize()
ShadowPageCacheBuf pages_cached_data = {"PagesCachedBuf"};
ShadowPagesInfoDataBuf pages_infos_data = {"PagesInfosBuf"};
ShadowStatisticsBuf statistics_buf = {"statistics_buf"};
ShadowTileMapClipBuf tilemaps_clip = {"tilemaps_clip"};
StorageArrayBuffer<ShadowTileMapClip, SHADOW_MAX_TILEMAP, false> tilemaps_clip = {
"tilemaps_clip"};
const uint lod0_len = SHADOW_TILEMAP_LOD0_LEN;
const uint lod1_len = SHADOW_TILEMAP_LOD1_LEN;
@@ -727,7 +753,7 @@ static void test_eevee_shadow_finalize()
const uint lod5_ofs = lod4_ofs + lod4_len;
for (auto i : IndexRange(SHADOW_TILEDATA_PER_TILEMAP)) {
tiles_data[i] = 0;
tiles_data[i] = SHADOW_NO_DATA;
}
{
@@ -735,52 +761,71 @@ static void test_eevee_shadow_finalize()
tile.is_used = true;
tile.is_allocated = true;
tile.page = uint2(1, 0);
tile.page = uint3(1, 0, 0);
tile.do_update = false;
tiles_data[lod0_ofs] = shadow_tile_pack(tile);
tile.page = uint2(2, 0);
tile.page = uint3(2, 0, 0);
tile.do_update = false;
tiles_data[lod1_ofs] = shadow_tile_pack(tile);
tile.page = uint2(3, 0);
tile.page = uint3(3, 0, 0);
tile.do_update = true;
tiles_data[lod2_ofs] = shadow_tile_pack(tile);
tile.page = uint2(4, 0);
tile.do_update = false;
tile.page = uint3(0, 1, 0);
tile.do_update = true;
tiles_data[lod3_ofs] = shadow_tile_pack(tile);
tile.page = uint2(5, 0);
tile.page = uint3(1, 1, 0);
tile.do_update = true;
tiles_data[lod4_ofs] = shadow_tile_pack(tile);
tile.page = uint2(6, 0);
tile.page = uint3(2, 1, 0);
tile.do_update = true;
tiles_data[lod5_ofs] = shadow_tile_pack(tile);
tile.page = uint2(7, 0);
tile.page = uint3(3, 1, 0);
tile.do_update = true;
tiles_data[lod0_ofs + 8] = shadow_tile_pack(tile);
tiles_data[lod0_ofs + 31] = shadow_tile_pack(tile);
tile.page = uint3(0, 2, 0);
tile.do_update = true;
tiles_data[lod3_ofs + 8] = shadow_tile_pack(tile);
tile.page = uint3(1, 2, 0);
tile.do_update = true;
tiles_data[lod0_ofs + 32 * 16 - 8] = shadow_tile_pack(tile);
tiles_data.push_update();
}
{
ShadowTileMapData tilemap = {};
tilemap.viewmat = float4x4::identity();
tilemap.tiles_index = 0;
tilemap.clip_data_index = 0;
tilemap.projection_type = SHADOW_PROJECTION_CUBEFACE;
tilemaps_data.append(tilemap);
tilemaps_data.push_update();
}
{
ShadowTileMapClip clip = {};
clip.clip_far_stored = 10.0f;
clip.clip_near_stored = 1.0f;
tilemaps_clip[0] = clip;
tilemaps_clip.push_update();
}
{
statistics_buf.view_needed_count = 0;
statistics_buf.push_update();
}
{
pages_infos_data.page_free_count = -5;
pages_infos_data.page_alloc_count = 0;
pages_infos_data.page_cached_next = 0u;
pages_infos_data.page_cached_start = 0u;
pages_infos_data.page_cached_end = 0u;
pages_infos_data.view_count = 0u;
pages_infos_data.page_size = 256u;
pages_infos_data.push_update();
}
@@ -791,44 +836,55 @@ static void test_eevee_shadow_finalize()
GPU_TEXTURE_USAGE_SHADER_WRITE);
tilemap_tx.clear(uint4(0));
Texture render_map_tx = {"ShadowRenderMap",
GPU_R32UI,
GPU_TEXTURE_USAGE_HOST_READ | GPU_TEXTURE_USAGE_SHADER_READ |
GPU_TEXTURE_USAGE_SHADER_WRITE | GPU_TEXTURE_USAGE_MIP_SWIZZLE_VIEW,
int2(SHADOW_TILEMAP_RES),
1, /* Only one layer for the test. */
nullptr,
SHADOW_TILEMAP_LOD + 1};
render_map_tx.ensure_mip_views();
View shadow_multi_view = {"ShadowMultiView", 64, true};
StorageArrayBuffer<ViewMatrices, DRW_VIEW_MAX> shadow_multi_view_buf = {"ShadowMultiView"};
StorageBuffer<DispatchCommand> clear_dispatch_buf;
StorageArrayBuffer<uint, SHADOW_MAX_PAGE> clear_page_buf = {"clear_page_buf"};
StorageArrayBuffer<uint, SHADOW_MAX_PAGE> clear_list_buf = {"clear_list_buf"};
StorageArrayBuffer<uint, SHADOW_RENDER_MAP_SIZE> render_map_buf = {"render_map_buf"};
StorageArrayBuffer<uint, SHADOW_VIEW_MAX> viewport_index_buf = {"viewport_index_buf"};
render_map_buf.clear_to_zero();
GPUShader *sh = GPU_shader_create_from_info_name("eevee_shadow_tilemap_finalize");
PassSimple pass("Test");
pass.shader_set(sh);
pass.bind_ssbo("tilemaps_buf", tilemaps_data);
pass.bind_ssbo("tilemaps_clip_buf", tilemaps_clip);
pass.bind_ssbo("tiles_buf", tiles_data);
pass.bind_ssbo("view_infos_buf", shadow_multi_view_buf);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf);
pass.bind_ssbo("clear_list_buf", clear_list_buf);
pass.bind_ssbo("render_map_buf", render_map_buf);
pass.bind_ssbo("viewport_index_buf", viewport_index_buf);
pass.bind_ssbo("pages_infos_buf", pages_infos_data);
pass.bind_image("tilemaps_img", tilemap_tx);
pass.bind_ssbo("view_infos_buf", shadow_multi_view.matrices_ubo_get());
pass.bind_ssbo("clear_dispatch_buf", clear_dispatch_buf);
pass.bind_ssbo("clear_page_buf", clear_page_buf);
pass.bind_ssbo("statistics_buf", statistics_buf);
pass.bind_ssbo("tilemaps_clip_buf", tilemaps_clip);
pass.bind_image("render_map_lod0_img", render_map_tx.mip_view(0));
pass.bind_image("render_map_lod1_img", render_map_tx.mip_view(1));
pass.bind_image("render_map_lod2_img", render_map_tx.mip_view(2));
pass.bind_image("render_map_lod3_img", render_map_tx.mip_view(3));
pass.bind_image("render_map_lod4_img", render_map_tx.mip_view(4));
pass.bind_image("render_map_lod5_img", render_map_tx.mip_view(5));
pass.dispatch(int3(1, 1, tilemaps_data.size()));
pass.barrier(GPU_BARRIER_BUFFER_UPDATE | GPU_BARRIER_TEXTURE_UPDATE);
Manager manager;
manager.submit(pass);
GPU_memory_barrier(GPU_BARRIER_BUFFER_UPDATE | GPU_BARRIER_TEXTURE_UPDATE);
{
/* Check output views. */
shadow_multi_view_buf.read();
for (auto i : IndexRange(5)) {
EXPECT_EQ(shadow_multi_view_buf[i].viewmat, float4x4::identity());
EXPECT_EQ(shadow_multi_view_buf[i].viewinv, float4x4::identity());
}
EXPECT_EQ(shadow_multi_view_buf[0].winmat,
math::projection::perspective(-1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[1].winmat,
math::projection::perspective(-1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[2].winmat,
math::projection::perspective(-1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[3].winmat,
math::projection::perspective(-1.0f, -0.75f, -1.0f, -0.75f, 1.0f, 10.0f));
EXPECT_EQ(shadow_multi_view_buf[4].winmat,
math::projection::perspective(0.5f, 1.5f, -1.0f, 0.0f, 1.0f, 10.0f));
}
{
uint *pixels = tilemap_tx.read<uint32_t>(GPU_DATA_UINT);
@@ -836,7 +892,8 @@ static void test_eevee_shadow_finalize()
std::string result = "";
for (auto y : IndexRange(SHADOW_TILEMAP_RES)) {
for (auto x : IndexRange(SHADOW_TILEMAP_RES)) {
result += std::to_string(shadow_tile_unpack(pixels[y * SHADOW_TILEMAP_RES + x]).page.x);
ShadowTileData tile = shadow_tile_unpack(pixels[y * SHADOW_TILEMAP_RES + x]);
result += std::to_string(tile.page.x + tile.page.y * SHADOW_PAGE_PER_ROW);
}
}
@@ -844,7 +901,7 @@ static void test_eevee_shadow_finalize()
/** The layout of these expected strings is Y down. */
StringRefNull expected_pages =
"12334444755555556666666666666666"
"12334444555555556666666666666667"
"22334444555555556666666666666666"
"33334444555555556666666666666666"
"33334444555555556666666666666666"
@@ -859,15 +916,15 @@ static void test_eevee_shadow_finalize()
"55555555555555556666666666666666"
"55555555555555556666666666666666"
"55555555555555556666666666666666"
"55555555555555556666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"55555555555555556666666696666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"88888888666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
"66666666666666666666666666666666"
@@ -881,17 +938,17 @@ static void test_eevee_shadow_finalize()
}
{
auto stringify_lod = [](Span<uint> data) -> std::string {
auto stringify_view = [](Span<uint> data) -> std::string {
std::string result = "";
for (auto x : data) {
result += (x == 0xFFFFFFFFu) ? '-' : '0' + (x % 10);
result += (x == 0u) ? '-' : ((x == 0xFFFFFFFFu) ? 'x' : '0' + (x % 10));
}
return result;
};
/** The layout of these expected strings is Y down. */
StringRefNull expected_lod0 =
"--------7-----------------------"
StringRefNull expected_view0 =
"6-------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
@@ -924,71 +981,166 @@ static void test_eevee_shadow_finalize()
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod1 =
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------"
"----------------";
StringRefNull expected_view1 =
"5-------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod2 =
"3-------"
"--------"
"--------"
"--------"
"--------"
"--------"
"--------"
"--------";
StringRefNull expected_view2 =
"4xxx----------------------------"
"xxxx----------------------------"
"8xxx----------------------------"
"xxxx----------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod3 =
"----"
"----"
"----"
"----";
StringRefNull expected_view3 =
"3-------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod4 =
"5-"
"--";
StringRefNull expected_view4 =
"xxxxxxx7xxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"xxxxxxxxxxxxxxxx----------------"
"9xxxxxxxxxxxxxxx----------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------"
"--------------------------------";
StringRefNull expected_lod5 = "6";
render_map_buf.read();
uint *pixels_lod0 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 0);
uint *pixels_lod1 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 1);
uint *pixels_lod2 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 2);
uint *pixels_lod3 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 3);
uint *pixels_lod4 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 4);
uint *pixels_lod5 = render_map_tx.read<uint32_t>(GPU_DATA_UINT, 5);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod0, lod0_len)), expected_lod0);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod1, lod1_len)), expected_lod1);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod2, lod2_len)), expected_lod2);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod3, lod3_len)), expected_lod3);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod4, lod4_len)), expected_lod4);
EXPECT_EQ(stringify_lod(Span<uint>(pixels_lod5, 1)), expected_lod5);
MEM_SAFE_FREE(pixels_lod0);
MEM_SAFE_FREE(pixels_lod1);
MEM_SAFE_FREE(pixels_lod2);
MEM_SAFE_FREE(pixels_lod3);
MEM_SAFE_FREE(pixels_lod4);
MEM_SAFE_FREE(pixels_lod5);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 0],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view0);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 1],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view1);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 2],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view2);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 3],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view3);
EXPECT_EQ(stringify_view(Span<uint>(&render_map_buf[SHADOW_TILEMAP_LOD0_LEN * 4],
SHADOW_TILEMAP_LOD0_LEN)),
expected_view4);
}
pages_infos_data.read();
EXPECT_EQ(pages_infos_data.page_free_count, 0);
EXPECT_EQ(pages_infos_data.view_count, 1);
statistics_buf.read();
EXPECT_EQ(statistics_buf.view_needed_count, 5);
GPU_shader_free(sh);
DRW_shaders_free();