From dda4c0721cba2d7ea243ad20f0018e1b7288c719 Mon Sep 17 00:00:00 2001 From: Jason Fielder Date: Thu, 20 Apr 2023 08:03:31 +0200 Subject: [PATCH] EEVEE-Next: Resolve compilation errors in Metal Shader source requires explicit conversions and shader address space qualifers in certain places in order to compile for Metal. We also require constructors for a number of default struct types. Authored by Apple: Michael Parkin-White Pull Request: https://projects.blender.org/blender/blender/pulls/106219 --- .../eevee_depth_of_field_accumulator_lib.glsl | 2 +- .../shaders/eevee_hiz_update_comp.glsl | 6 +- .../eevee_light_culling_tile_comp.glsl | 4 +- .../eevee_motion_blur_dilate_comp.glsl | 21 ++++-- .../eevee_motion_blur_gather_comp.glsl | 4 +- .../shaders/eevee_nodetree_lib.glsl | 4 +- .../shaders/eevee_shadow_tag_update_comp.glsl | 4 +- .../shaders/eevee_shadow_tag_usage_lib.glsl | 2 +- .../workbench_shadow_visibility_comp.glsl | 2 +- .../blender/draw/intern/draw_shader_shared.h | 22 +++++- source/blender/draw/intern/draw_view.cc | 20 +++--- .../draw/intern/shaders/common_aabb_lib.glsl | 8 +++ .../intern/shaders/common_intersect_lib.glsl | 17 +++-- .../draw/intern/shaders/common_shape_lib.glsl | 18 ++++- .../shaders/draw_view_finalize_comp.glsl | 72 +++++++++---------- .../intern/shaders/draw_visibility_comp.glsl | 5 +- .../gpu/shaders/metal/mtl_shader_defines.msl | 59 +++++++++++++-- .../shaders/opengl/glsl_shader_defines.glsl | 6 ++ 18 files changed, 191 insertions(+), 85 deletions(-) diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl index 1da741d7609..957c9b01a2a 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_accumulator_lib.glsl @@ -590,7 +590,7 @@ void dof_gather_accumulator(sampler2D color_tx, * The full pixel neighborhood is gathered. * \{ */ -void dof_slight_focus_gather(sampler2D depth_tx, +void dof_slight_focus_gather(depth2D depth_tx, sampler2D color_tx, sampler2D bkh_lut_tx, /* Renamed because of ugly macro job. */ float radius, diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl index 479a6b590b0..cea25ef7ce0 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl @@ -62,7 +62,7 @@ void main() int mask_shift = 1; #define downsample_level(out_mip__, lod_) \ - active_thread = all(lessThan(local_px, gl_WorkGroupSize.xy >> uint(mask_shift))); \ + active_thread = all(lessThan(uvec2(local_px), gl_WorkGroupSize.xy >> uint(mask_shift))); \ barrier(); /* Wait for previous writes to finish. */ \ if (active_thread) { \ max_depth = max_v4(load_local_depths(local_px)); \ @@ -89,12 +89,12 @@ void main() } finished_tile_counter = 0u; - ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize * 2u)); + ivec2 iter = divide_ceil(imageSize(out_mip_5), ivec2(gl_WorkGroupSize.xy * 2u)); ivec2 image_border = imageSize(out_mip_5) - 1; for (int y = 0; y < iter.y; y++) { for (int x = 0; x < iter.x; x++) { /* Load result of the other work groups. */ - kernel_origin = ivec2(gl_WorkGroupSize) * ivec2(x, y); + kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y); src_px = ivec2(kernel_origin + local_px) * 2; vec4 samp; samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl index 37705e22b22..1f012a44acf 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_culling_tile_comp.glsl @@ -168,13 +168,15 @@ void main() } /* Fallthrough to the hemispheric case. */ case LIGHT_RECT: - case LIGHT_ELLIPSE: + case LIGHT_ELLIPSE: { vec3 v000 = vP - v_right * radius - v_up * radius; vec3 v100 = v000 + v_right * (radius * 2.0); vec3 v010 = v000 + v_up * (radius * 2.0); vec3 v001 = v000 - v_back * radius; Box bbox = shape_box(v000, v100, v010, v001); intersect_tile = intersect_tile && intersect(tile, bbox); + break; + } default: break; } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl index 07139ea6a09..e365da53d2b 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_dilate_comp.glsl @@ -74,8 +74,10 @@ void main() vec4 max_motion = imageLoad(in_tiles_img, src_tile); - MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, src_tile); - MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); + MotionPayload payload_prv = motion_blur_tile_indirection_pack_payload(max_motion.xy, + uvec2(src_tile)); + MotionPayload payload_nxt = motion_blur_tile_indirection_pack_payload(max_motion.zw, + uvec2(src_tile)); if (true) { /* Rectangular area (in tiles) where the motion vector spreads. */ MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy); @@ -85,17 +87,20 @@ void main() for (int y = 0; y < motion_rect.extent.y; y++) { ivec2 tile = motion_rect.bottom_left + ivec2(x, y); if (is_inside_motion_line(tile, motion_line)) { - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv); /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in * the motion next so that weighting in gather pass is better. */ - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt); } } } } if (true) { - MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, src_tile); + MotionPayload payload = motion_blur_tile_indirection_pack_payload(max_motion.zw, + uvec2(src_tile)); /* Rectangular area (in tiles) where the motion vector spreads. */ MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw); MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw); @@ -104,10 +109,12 @@ void main() for (int y = 0; y < motion_rect.extent.y; y++) { ivec2 tile = motion_rect.bottom_left + ivec2(x, y); if (is_inside_motion_line(tile, motion_line)) { - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_NEXT, tile, payload_nxt); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_NEXT, uvec2(tile), payload_nxt); /* FIXME: This is a bit weird, but for some reason, we need the store the same vector in * the motion next so that weighting in gather pass is better. */ - motion_blur_tile_indirection_store(tile_indirection_buf, MOTION_PREV, tile, payload_prv); + motion_blur_tile_indirection_store( + tile_indirection_buf, MOTION_PREV, uvec2(tile), payload_prv); } } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl index 5249e6637b6..1408f28e585 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_motion_blur_gather_comp.glsl @@ -178,10 +178,10 @@ void main() vec4 max_motion; /* Load dilation result from the indirection table. */ ivec2 tile_prev; - motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, tile, tile_prev); + motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_PREV, uvec2(tile), tile_prev); max_motion.xy = imageLoad(in_tiles_img, tile_prev).xy; ivec2 tile_next; - motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, tile, tile_next); + motion_blur_tile_indirection_load(tile_indirection_buf, MOTION_NEXT, uvec2(tile), tile_next); max_motion.zw = imageLoad(in_tiles_img, tile_next).zw; Accumulator accum; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl index db38baab6a4..6d802a6d79a 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_nodetree_lib.glsl @@ -242,13 +242,13 @@ void output_aov(vec4 color, float value, uint hash) #if defined(MAT_AOV_SUPPORT) && defined(GPU_FRAGMENT_SHADER) for (int i = 0; i < AOV_MAX && i < aov_buf.color_len; i++) { if (aov_buf.hash_color[i] == hash) { - imageStore(aov_color_img, ivec3(gl_FragCoord.xy, i), color); + imageStore(aov_color_img, ivec3(ivec2(gl_FragCoord.xy), i), color); return; } } for (int i = 0; i < AOV_MAX && i < aov_buf.value_len; i++) { if (aov_buf.hash_value[i] == hash) { - imageStore(aov_value_img, ivec3(gl_FragCoord.xy, i), vec4(value)); + imageStore(aov_value_img, ivec3(ivec2(gl_FragCoord.xy), i), vec4(value)); return; } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl index 475d456db7a..9f9a4c88f9c 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_update_comp.glsl @@ -65,7 +65,7 @@ void main() } AABB aabb_tag; - AABB aabb_map = AABB(vec3(-0.99999), vec3(0.99999)); + AABB aabb_map = shape_aabb(vec3(-0.99999), vec3(0.99999)); /* Directionnal winmat have no correct near/far in the Z dimension at this point. * Do not clip in this dimension. */ @@ -87,7 +87,7 @@ void main() for (int y = box_min.y; y <= box_max.y; y++) { for (int x = box_min.x; x <= box_max.x; x++) { int tile_index = shadow_tile_offset(ivec2(x, y), tilemap.tiles_index, lod); - atomicOr(tiles_buf[tile_index], SHADOW_DO_UPDATE); + atomicOr(tiles_buf[tile_index], uint(SHADOW_DO_UPDATE)); } } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl index bb18f56ec74..172fe9488f4 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_shadow_tag_usage_lib.glsl @@ -21,7 +21,7 @@ void shadow_tag_usage_tile(LightData light, ivec2 tile_co, int lod, int tilemap_ tile_co >>= lod; int tile_index = shadow_tile_offset(tile_co, tilemaps_buf[tilemap_index].tiles_index, lod); - atomicOr(tiles_buf[tile_index], SHADOW_IS_USED); + atomicOr(tiles_buf[tile_index], uint(SHADOW_IS_USED)); } void shadow_tag_usage_tilemap_directional(uint l_idx, vec3 P, vec3 V, float radius) diff --git a/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl b/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl index aef73672a8a..346e10d7083 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_shadow_visibility_comp.glsl @@ -44,7 +44,7 @@ bool is_visible(IsectBox box) bool intersects_near_plane(IsectBox box) { - vec4 near_plane = drw_view_culling.planes[4]; + vec4 near_plane = drw_view_culling.frustum_planes.planes[4]; bool on_positive_side = false; bool on_negative_side = false; diff --git a/source/blender/draw/intern/draw_shader_shared.h b/source/blender/draw/intern/draw_shader_shared.h index 28090ef2b46..3ad1e11df28 100644 --- a/source/blender/draw/intern/draw_shader_shared.h +++ b/source/blender/draw/intern/draw_shader_shared.h @@ -21,6 +21,8 @@ typedef struct DispatchCommand DispatchCommand; typedef struct DRWDebugPrintBuffer DRWDebugPrintBuffer; typedef struct DRWDebugVert DRWDebugVert; typedef struct DRWDebugDrawBuffer DRWDebugDrawBuffer; +typedef struct FrustumCorners FrustumCorners; +typedef struct FrustumPlanes FrustumPlanes; /* __cplusplus is true when compiling with MSL. */ # if defined(__cplusplus) && !defined(GPU_SHADER) @@ -94,11 +96,27 @@ uint drw_view_id = 0; # define DRW_VIEW_FROM_RESOURCE_ID drw_view_id = (drw_ResourceID & DRW_VIEW_MASK) #endif +struct FrustumCorners { + float4 corners[8]; +}; +BLI_STATIC_ASSERT_ALIGN(FrustumCorners, 16) + +struct FrustumPlanes { + /* [0] left + * [1] right + * [2] bottom + * [3] top + * [4] near + * [5] far */ + float4 planes[6]; +}; +BLI_STATIC_ASSERT_ALIGN(FrustumPlanes, 16) + struct ViewCullingData { /** \note vec3 array padded to vec4. */ /** Frustum corners. */ - float4 corners[8]; - float4 planes[6]; + FrustumCorners frustum_corners; + FrustumPlanes frustum_planes; float4 bound_sphere; }; BLI_STATIC_ASSERT_ALIGN(ViewCullingData, 16) diff --git a/source/blender/draw/intern/draw_view.cc b/source/blender/draw/intern/draw_view.cc index 30417ff6420..c6b7ac11017 100644 --- a/source/blender/draw/intern/draw_view.cc +++ b/source/blender/draw/intern/draw_view.cc @@ -50,7 +50,8 @@ void View::frustum_boundbox_calc(int view_id) } #endif - MutableSpan corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)}; + MutableSpan corners = {culling_[view_id].frustum_corners.corners, + ARRAY_SIZE(culling_[view_id].frustum_corners.corners)}; float left, right, bottom, top, near, far; bool is_persp = data_[view_id].winmat[3][3] == 0.0f; @@ -89,15 +90,15 @@ void View::frustum_culling_planes_calc(int view_id) { float4x4 persmat = data_[view_id].winmat * data_[view_id].viewmat; planes_from_projmat(persmat.ptr(), - culling_[view_id].planes[0], - culling_[view_id].planes[5], - culling_[view_id].planes[1], - culling_[view_id].planes[3], - culling_[view_id].planes[4], - culling_[view_id].planes[2]); + culling_[view_id].frustum_planes.planes[0], + culling_[view_id].frustum_planes.planes[5], + culling_[view_id].frustum_planes.planes[1], + culling_[view_id].frustum_planes.planes[3], + culling_[view_id].frustum_planes.planes[4], + culling_[view_id].frustum_planes.planes[2]); /* Normalize. */ - for (float4 &plane : culling_[view_id].planes) { + for (float4 &plane : culling_[view_id].frustum_planes.planes) { plane.w /= normalize_v3(plane); } } @@ -105,7 +106,8 @@ void View::frustum_culling_planes_calc(int view_id) void View::frustum_culling_sphere_calc(int view_id) { BoundSphere &bsphere = *reinterpret_cast(&culling_[view_id].bound_sphere); - Span corners = {culling_[view_id].corners, ARRAY_SIZE(culling_[view_id].corners)}; + Span corners = {culling_[view_id].frustum_corners.corners, + ARRAY_SIZE(culling_[view_id].frustum_corners.corners)}; /* Extract Bounding Sphere */ if (data_[view_id].winmat[3][3] != 0.0f) { diff --git a/source/blender/draw/intern/shaders/common_aabb_lib.glsl b/source/blender/draw/intern/shaders/common_aabb_lib.glsl index b5f664a6779..5adcdec4a3e 100644 --- a/source/blender/draw/intern/shaders/common_aabb_lib.glsl +++ b/source/blender/draw/intern/shaders/common_aabb_lib.glsl @@ -9,6 +9,14 @@ struct AABB { vec3 min, max; }; +AABB shape_aabb(vec3 min, vec3 max) +{ + AABB aabb; + aabb.min = min; + aabb.max = max; + return aabb; +} + AABB aabb_init_min_max() { AABB aabb; diff --git a/source/blender/draw/intern/shaders/common_intersect_lib.glsl b/source/blender/draw/intern/shaders/common_intersect_lib.glsl index e23216ec2e2..252298022e3 100644 --- a/source/blender/draw/intern/shaders/common_intersect_lib.glsl +++ b/source/blender/draw/intern/shaders/common_intersect_lib.glsl @@ -136,7 +136,7 @@ bool intersect_view(Pyramid pyramid) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 5; ++v) { - float test = dot(drw_view_culling.planes[p], vec4(pyramid.corners[v], 1.0)); + float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(pyramid.corners[v], 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -158,7 +158,8 @@ bool intersect_view(Pyramid pyramid) for (int p = 0; p < 5; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(i_pyramid.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); + float test = dot(i_pyramid.planes[p], + vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -181,7 +182,7 @@ bool intersect_view(Box box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(drw_view_culling.planes[p], vec4(box.corners[v], 1.0)); + float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(box.corners[v], 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -203,7 +204,8 @@ bool intersect_view(Box box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); + float test = dot(i_box.planes[p], + vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -227,7 +229,7 @@ bool intersect_view(IsectBox i_box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(drw_view_culling.planes[p], vec4(i_box.corners[v], 1.0)); + float test = dot(drw_view_culling.frustum_planes.planes[p], vec4(i_box.corners[v], 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -247,7 +249,8 @@ bool intersect_view(IsectBox i_box) for (int p = 0; p < 6; ++p) { bool is_any_vertex_on_positive_side = false; for (int v = 0; v < 8; ++v) { - float test = dot(i_box.planes[p], vec4(drw_view_culling.corners[v].xyz, 1.0)); + float test = dot(i_box.planes[p], + vec4(drw_view_culling.frustum_corners.corners[v].xyz, 1.0)); if (test > 0.0) { is_any_vertex_on_positive_side = true; break; @@ -268,7 +271,7 @@ bool intersect_view(Sphere sphere) bool intersects = true; for (int p = 0; p < 6 && intersects; ++p) { - float dist_to_plane = dot(drw_view_culling.planes[p], vec4(sphere.center, 1.0)); + float dist_to_plane = dot(drw_view_culling.frustum_planes.planes[p], vec4(sphere.center, 1.0)); if (dist_to_plane < -sphere.radius) { intersects = false; } diff --git a/source/blender/draw/intern/shaders/common_shape_lib.glsl b/source/blender/draw/intern/shaders/common_shape_lib.glsl index 56722c417aa..25a2781d729 100644 --- a/source/blender/draw/intern/shaders/common_shape_lib.glsl +++ b/source/blender/draw/intern/shaders/common_shape_lib.glsl @@ -18,7 +18,10 @@ struct Circle { Circle shape_circle(vec2 center, float radius) { - return Circle(center, radius); + Circle circle; + circle.center = center; + circle.radius = radius; + return circle; } /** \} */ @@ -34,7 +37,10 @@ struct Sphere { Sphere shape_sphere(vec3 center, float radius) { - return Sphere(center, radius); + Sphere sphere; + sphere.center = center; + sphere.radius = radius; + return sphere; } /** \} */ @@ -192,6 +198,14 @@ Frustum shape_frustum(vec3 corners[8]) struct Cone { vec3 direction; float angle_cos; + +#ifdef GPU_METAL + inline Cone() = default; + inline Cone(vec3 in_direction, float in_angle_cos) + : direction(in_direction), angle_cos(in_angle_cos) + { + } +#endif }; Cone shape_cone(vec3 direction, float angle_cosine) diff --git a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl index f3af010a47c..6fc34af815d 100644 --- a/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl +++ b/source/blender/draw/intern/shaders/draw_view_finalize_comp.glsl @@ -33,18 +33,19 @@ void projmat_dimensions(mat4 winmat, } } -void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8]) +void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out FrustumCorners frustum_corners) { float left, right, bottom, top, near, far; bool is_persp = winmat[3][3] == 0.0; projmat_dimensions(winmat, left, right, bottom, top, near, far); - corners[0][2] = corners[3][2] = corners[7][2] = corners[4][2] = -near; - corners[0][0] = corners[3][0] = left; - corners[4][0] = corners[7][0] = right; - corners[0][1] = corners[4][1] = bottom; - corners[7][1] = corners[3][1] = top; + frustum_corners.corners[0][2] = frustum_corners.corners[3][2] = frustum_corners.corners[7][2] = + frustum_corners.corners[4][2] = -near; + frustum_corners.corners[0][0] = frustum_corners.corners[3][0] = left; + frustum_corners.corners[4][0] = frustum_corners.corners[7][0] = right; + frustum_corners.corners[0][1] = frustum_corners.corners[4][1] = bottom; + frustum_corners.corners[7][1] = frustum_corners.corners[3][1] = top; /* Get the coordinates of the far plane. */ if (is_persp) { @@ -55,25 +56,20 @@ void frustum_boundbox_calc(mat4 winmat, mat4 viewinv, out vec4 corners[8]) top *= sca_far; } - corners[1][2] = corners[2][2] = corners[6][2] = corners[5][2] = -far; - corners[1][0] = corners[2][0] = left; - corners[6][0] = corners[5][0] = right; - corners[1][1] = corners[5][1] = bottom; - corners[2][1] = corners[6][1] = top; + frustum_corners.corners[1][2] = frustum_corners.corners[2][2] = frustum_corners.corners[6][2] = + frustum_corners.corners[5][2] = -far; + frustum_corners.corners[1][0] = frustum_corners.corners[2][0] = left; + frustum_corners.corners[6][0] = frustum_corners.corners[5][0] = right; + frustum_corners.corners[1][1] = frustum_corners.corners[5][1] = bottom; + frustum_corners.corners[2][1] = frustum_corners.corners[6][1] = top; /* Transform into world space. */ for (int i = 0; i < 8; i++) { - corners[i].xyz = transform_point(viewinv, corners[i].xyz); + frustum_corners.corners[i].xyz = transform_point(viewinv, frustum_corners.corners[i].xyz); } } -void planes_from_projmat(mat4 mat, - out vec4 left, - out vec4 right, - out vec4 bottom, - out vec4 top, - out vec4 near, - out vec4 far) +void planes_from_projmat(mat4 mat, out FrustumPlanes frustum_planes) { /* References: * @@ -81,35 +77,35 @@ void planes_from_projmat(mat4 mat, * http://www8.cs.umu.se/kurser/5DV051/HT12/lab/plane_extraction.pdf */ mat = transpose(mat); - left = mat[3] + mat[0]; - right = mat[3] - mat[0]; - bottom = mat[3] + mat[1]; - top = mat[3] - mat[1]; - near = mat[3] + mat[2]; - far = mat[3] - mat[2]; + frustum_planes.planes[0] = mat[3] + mat[0]; + frustum_planes.planes[1] = mat[3] - mat[0]; + frustum_planes.planes[2] = mat[3] + mat[1]; + frustum_planes.planes[3] = mat[3] - mat[1]; + frustum_planes.planes[4] = mat[3] + mat[2]; + frustum_planes.planes[5] = mat[3] - mat[2]; } -void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out vec4 planes[6]) +void frustum_culling_planes_calc(mat4 winmat, mat4 viewmat, out FrustumPlanes frustum_planes) { mat4 persmat = winmat * viewmat; - planes_from_projmat(persmat, planes[0], planes[5], planes[1], planes[3], planes[4], planes[2]); + planes_from_projmat(persmat, frustum_planes); /* Normalize. */ for (int p = 0; p < 6; p++) { - planes[p] /= length(planes[p].xyz); + frustum_planes.planes[p] /= length(frustum_planes.planes[p].xyz); } } -vec4 frustum_culling_sphere_calc(vec4 corners[8]) +vec4 frustum_culling_sphere_calc(FrustumCorners frustum_corners) { /* Extract Bounding Sphere */ /* TODO(fclem): This is significantly less precise than CPU, but it isn't used in most cases. */ vec4 bsphere; - bsphere.xyz = (corners[0].xyz + corners[6].xyz) * 0.5; + bsphere.xyz = (frustum_corners.corners[0].xyz + frustum_corners.corners[6].xyz) * 0.5; bsphere.w = 0.0; for (int i = 0; i < 8; i++) { - bsphere.w = max(bsphere.w, distance(bsphere.xyz, corners[i].xyz)); + bsphere.w = max(bsphere.w, distance(bsphere.xyz, frustum_corners.corners[i].xyz)); } return bsphere; } @@ -125,11 +121,15 @@ void main() return; } - frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, view_culling_buf[drw_view_id].corners); + /* Read frustom_corners from device memory, update, and write back. */ + FrustumCorners frustum_corners = view_culling_buf[drw_view_id].frustum_corners; + frustum_boundbox_calc(drw_view.winmat, drw_view.viewinv, frustum_corners); + view_culling_buf[drw_view_id].frustum_corners = frustum_corners; - frustum_culling_planes_calc( - drw_view.winmat, drw_view.viewmat, view_culling_buf[drw_view_id].planes); + /* Read frustum_planes from device memory, update, and write back. */ + FrustumPlanes frustum_planes = view_culling_buf[drw_view_id].frustum_planes; + frustum_culling_planes_calc(drw_view.winmat, drw_view.viewmat, frustum_planes); - view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc( - view_culling_buf[drw_view_id].corners); + view_culling_buf[drw_view_id].frustum_planes = frustum_planes; + view_culling_buf[drw_view_id].bound_sphere = frustum_culling_sphere_calc(frustum_corners); } diff --git a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl index f3ca51dbf6b..0d2717aea64 100644 --- a/source/blender/draw/intern/shaders/draw_visibility_comp.glsl +++ b/source/blender/draw/intern/shaders/draw_visibility_comp.glsl @@ -34,8 +34,9 @@ void main() bounds.bounding_corners[1].xyz, bounds.bounding_corners[2].xyz, bounds.bounding_corners[3].xyz); - Sphere bounding_sphere = Sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w); - Sphere inscribed_sphere = Sphere(bounds.bounding_sphere.xyz, bounds._inner_sphere_radius); + Sphere bounding_sphere = shape_sphere(bounds.bounding_sphere.xyz, bounds.bounding_sphere.w); + Sphere inscribed_sphere = shape_sphere(bounds.bounding_sphere.xyz, + bounds._inner_sphere_radius); for (drw_view_id = 0; drw_view_id < view_len; drw_view_id++) { if (drw_view_culling.bound_sphere.w == -1.0) { diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl index a192e51a0ec..dfb74a3e76b 100644 --- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -101,10 +101,18 @@ struct constexp_uvec3 { return 0; } } - inline operator uint3() const + constexpr inline operator uint3() const { return xyz; } + constexpr inline operator uint2() const + { + return xy; + } + constexpr inline operator uint() const + { + return x; + } }; constexpr constexp_uvec3 __internal_workgroupsize_get() @@ -140,6 +148,10 @@ template T atomicSub(threadgroup T &mem, T data) { return atomic_fetch_sub_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); } +template T atomicAnd(threadgroup T &mem, T data) +{ + return atomic_fetch_and_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); +} template T atomicOr(threadgroup T &mem, T data) { return atomic_fetch_or_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); @@ -152,29 +164,41 @@ template T atomicXor(threadgroup T &mem, T data) /* Device memory. */ template T atomicMax(device T &mem, T data) { - return atomic_fetch_max_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_max_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicMin(device T &mem, T data) { - return atomic_fetch_min_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_min_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicAdd(device T &mem, T data) { - return atomic_fetch_add_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_add_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicSub(device T &mem, T data) { - return atomic_fetch_sub_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_sub_explicit((device _atomic *)&mem, data, memory_order_relaxed); +} +template T atomicAnd(device T &mem, T data) +{ + return atomic_fetch_and_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicOr(device T &mem, T data) { - return atomic_fetch_or_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_or_explicit((device _atomic *)&mem, data, memory_order_relaxed); } template T atomicXor(device T &mem, T data) { - return atomic_fetch_xor_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); + return atomic_fetch_xor_explicit((device _atomic *)&mem, data, memory_order_relaxed); } +/* Unblock texture atomic compilation. + * TODO(Metal): This is not correct for global atomic behaviour, but will be safe within a single thread. + * We need to re-visit the solution for this use-case and use a 2D texture buffer instead. */ +#define imageAtomicMin(tex, coord, data) \ + uint val = _texelFetch_internal(tex, coord, 0).r;\ + _texture_write_internal(tex, coord, uint4((val < data) ? val : data));\ + tex.texture->fence(); + /* Used to replace 'out' in function parameters with threadlocal reference * shortened to avoid expanding the glsl source string. */ #define THD thread @@ -1126,6 +1150,27 @@ inline float4 uintBitsToFloat(uint4 f) return as_type(f); } +#define bitfieldReverse reverse_bits +#define bitfieldExtract extract_bits +#define bitfieldInsert insert_bits +#define bitCount popcount + +template T findLSB(T x) +{ + /* ctz returns the number of trailing zeroes. To fetch the index of the LSB, we can also use this + * value as index, however need to filter out the case where the input value is zero to match + * GLSL functionality. */ + return (x == T(0)) ? T(-1) : T(ctz(x)); +} + +template T findMSB(T x) +{ + /* clz returns the number of leading zeroes. To fetch the index of the LSB, we can also use this + * value as index when offset by 1. however need to filter out the case where the input value is + * zero to match GLSL functionality. 000000010*/ + return (x == T(0)) ? T(-1) : (clz(T(0)) - clz(x) - T(1)); +} + /* Texture size functions. Add texture types as needed. */ #define imageSize(image) textureSize(image, 0) diff --git a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl index f2d972ea574..eb09f580391 100644 --- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl +++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl @@ -15,6 +15,12 @@ #define depthCubeArray samplerCubeArray #define depth2DArrayShadow sampler2DArrayShadow +/* Memory scope and pass by reference types. + * NOTE: These are required by Metal, but are not required in all cases by GLSL. */ +#define device +#define threadgroup +#define OUT(type, name, array_len) out type name[array_len] + /* Backend Functions. */ #define select(A, B, mask) mix(A, B, mask)