From 9c0321ae9b4be72a9f5e572471f2814ac2f6dfef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Mon, 7 Oct 2024 12:54:10 +0200 Subject: [PATCH] Metal: Simplify MSL translation Move most of the string preprocessing used for MSL compatibility to `glsl_preprocess`. Enforce some changes like matrix constructor and array constructor to the GLSL codebase. This is for C++ compatibility. Additionally reduce the amount of code duplication inside the compatibility code. Pull Request: https://projects.blender.org/blender/blender/pulls/128634 --- .../compositor_deriche_gaussian_blur.glsl | 4 +- .../shaders/compositor_kuwahara_classic.glsl | 5 +- .../shaders/compositor_plane_deform.glsl | 2 +- .../shaders/compositor_plane_deform_mask.glsl | 2 +- .../compositor_plane_deform_motion_blur.glsl | 2 +- ...positor_plane_deform_motion_blur_mask.glsl | 2 +- .../shaders/compositor_realize_on_domain.glsl | 2 +- .../compositor_van_vliet_gaussian_blur.glsl | 4 +- .../shaders/eevee_depth_of_field_lib.glsl | 4 +- .../eevee_depth_of_field_resolve_comp.glsl | 2 +- .../eevee_depth_of_field_stabilize_comp.glsl | 8 +- .../eevee_display_lightprobe_planar_vert.glsl | 12 +- .../eevee_display_lightprobe_sphere_vert.glsl | 12 +- .../eevee_display_lightprobe_volume_vert.glsl | 12 +- .../eevee_next/shaders/eevee_film_lib.glsl | 16 +- .../eevee_light_shadow_setup_comp.glsl | 14 +- .../shaders/eevee_lightprobe_eval_lib.glsl | 2 +- .../eevee_lightprobe_volume_eval_lib.glsl | 2 +- .../eevee_lightprobe_volume_load_comp.glsl | 3 +- .../overlay_armature_envelope_solid_vert.glsl | 2 +- .../overlay_armature_shape_outline_vert.glsl | 2 +- ...y_armature_shape_outline_vert_no_geom.glsl | 2 +- .../overlay_armature_shape_solid_vert.glsl | 2 +- .../overlay_armature_sphere_outline_vert.glsl | 2 +- .../overlay_armature_sphere_solid_vert.glsl | 2 +- .../shaders/overlay_background_frag.glsl | 8 +- .../overlay_edit_mesh_skin_root_vert.glsl | 2 +- .../overlay_outline_prepass_curves_vert.glsl | 2 +- .../shaders/overlay_paint_weight_frag.glsl | 2 +- .../overlay_volume_gridlines_vert.glsl | 10 +- .../shaders/overlay_volume_velocity_vert.glsl | 10 +- .../shaders/overlay_wireframe_vert.glsl | 2 +- .../shaders/workbench_volume_frag.glsl | 8 +- .../intern/shaders/common_debug_draw_lib.glsl | 18 +- .../shaders/common_debug_print_lib.glsl | 6 +- .../draw/intern/shaders/common_hair_lib.glsl | 4 +- .../draw/intern/shaders/common_math_lib.glsl | 2 +- .../intern/shaders/common_pointcloud_lib.glsl | 2 +- .../draw/intern/shaders/common_view_lib.glsl | 10 +- .../draw_debug_print_display_frag.glsl | 192 +- .../draw/intern/shaders/draw_model_lib.glsl | 8 +- .../draw/intern/shaders/draw_view_lib.glsl | 4 +- source/blender/gpu/GPU_shader.hh | 5 + .../gpu/glsl_preprocess/CMakeLists.txt | 2 + .../gpu/glsl_preprocess/glsl_preprocess.cc | 40 +- .../gpu/glsl_preprocess/glsl_preprocess.hh | 219 ++ source/blender/gpu/intern/gpu_shader.cc | 58 + .../gpu/intern/gpu_shader_dependency.cc | 10 +- .../blender/gpu/metal/mtl_shader_generator.hh | 2 - .../blender/gpu/metal/mtl_shader_generator.mm | 438 +-- source/blender/gpu/opengl/gl_backend.cc | 19 - source/blender/gpu/opengl/gl_context.hh | 1 - source/blender/gpu/opengl/gl_shader.cc | 4 - .../common/gpu_shader_math_matrix_lib.glsl | 46 +- .../gpu_shader_2D_widget_base_vert.glsl | 11 +- .../gpu_shader_2D_widget_shadow_vert.glsl | 74 +- .../gpu/shaders/gpu_shader_codegen_lib.glsl | 8 +- .../gpu/shaders/gpu_shader_text_frag.glsl | 4 +- .../gpu_shader_material_transform_utils.glsl | 20 +- .../gpu/shaders/metal/mtl_shader_defines.msl | 3024 +++++------------ .../shaders/opengl/glsl_shader_defines.glsl | 31 + .../gpu/tests/shaders/gpu_math_test.glsl | 14 +- source/blender/gpu/vulkan/vk_device.cc | 3 - source/blender/python/gpu/gpu_py_shader.cc | 2 +- 64 files changed, 1519 insertions(+), 2928 deletions(-) create mode 100644 source/blender/gpu/glsl_preprocess/glsl_preprocess.hh diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur.glsl index e3414632a86..41f4ede125d 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_deriche_gaussian_blur.glsl @@ -35,7 +35,7 @@ void main() * boundary condition, so we initialize all inputs by the boundary pixel. */ ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y); vec4 input_boundary = texture_load(input_tx, boundary_texel); - vec4 inputs[FILTER_ORDER + 1] = vec4[]( + vec4 inputs[FILTER_ORDER + 1] = float4_array( input_boundary, input_boundary, input_boundary, input_boundary, input_boundary); /* Create an array that holds the last FILTER_ORDER outputs along with the current output. The @@ -44,7 +44,7 @@ void main() * boundary coefficient. See the DericheGaussianCoefficients class for more information on the * boundary handing. */ vec4 output_boundary = input_boundary * boundary_coefficient; - vec4 outputs[FILTER_ORDER + 1] = vec4[]( + vec4 outputs[FILTER_ORDER + 1] = float4_array( output_boundary, output_boundary, output_boundary, output_boundary, output_boundary); for (int x = 0; x < width; x++) { diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_kuwahara_classic.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_kuwahara_classic.glsl index 6390a5ad7bf..c5ff051999f 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_kuwahara_classic.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_kuwahara_classic.glsl @@ -16,8 +16,9 @@ void main() int radius = max(0, size); #endif - vec4 mean_of_squared_color_of_quadrants[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); - vec4 mean_of_color_of_quadrants[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + vec4 mean_of_squared_color_of_quadrants[4] = float4_array( + vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + vec4 mean_of_color_of_quadrants[4] = float4_array(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); /* Compute the above statistics for each of the quadrants around the current pixel. */ for (int q = 0; q < 4; q++) { diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform.glsl index 2608fbf230b..699836d4f44 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform.glsl @@ -11,7 +11,7 @@ void main() vec2 coordinates = (vec2(texel) + vec2(0.5)) / output_size; - vec3 transformed_coordinates = mat3(homography_matrix) * vec3(coordinates, 1.0); + vec3 transformed_coordinates = to_float3x3(homography_matrix) * vec3(coordinates, 1.0); vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z; /* The derivatives of the projected coordinates with respect to x and y are the first and diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_mask.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_mask.glsl index 4762ab8c3c3..d463033417a 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_mask.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_mask.glsl @@ -8,7 +8,7 @@ void main() vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(mask_img)); - vec3 transformed_coordinates = mat3(homography_matrix) * vec3(coordinates, 1.0); + vec3 transformed_coordinates = to_float3x3(homography_matrix) * vec3(coordinates, 1.0); vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z; bool is_inside_plane = all(greaterThanEqual(projected_coordinates, vec2(0.0))) && diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur.glsl index 7d5a2a30906..631de31a5d8 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur.glsl @@ -13,7 +13,7 @@ void main() vec4 accumulated_color = vec4(0.0); for (int i = 0; i < number_of_motion_blur_samples; i++) { - mat3 homography_matrix = mat3(homography_matrices[i]); + mat3 homography_matrix = to_float3x3(homography_matrices[i]); vec3 transformed_coordinates = homography_matrix * vec3(coordinates, 1.0); vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z; diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur_mask.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur_mask.glsl index 9f41bf6aa88..149a535553f 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur_mask.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_plane_deform_motion_blur_mask.glsl @@ -10,7 +10,7 @@ void main() float accumulated_mask = 0.0; for (int i = 0; i < number_of_motion_blur_samples; i++) { - mat3 homography_matrix = mat3(homography_matrices[i]); + mat3 homography_matrix = to_float3x3(homography_matrices[i]); vec3 transformed_coordinates = homography_matrix * vec3(coordinates, 1.0); vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z; diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_realize_on_domain.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_realize_on_domain.glsl index f05a2af3909..e0a4085b073 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_realize_on_domain.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_realize_on_domain.glsl @@ -15,7 +15,7 @@ void main() /* Transform the input image by transforming the domain coordinates with the inverse of input * image's transformation. The inverse transformation is an affine matrix and thus the * coordinates should be in homogeneous coordinates. */ - coordinates = (mat3(inverse_transformation) * vec3(coordinates, 1.0)).xy; + coordinates = (to_float3x3(inverse_transformation) * vec3(coordinates, 1.0)).xy; /* Subtract the offset and divide by the input image size to get the relevant coordinates into * the sampler's expected [0, 1] range. */ diff --git a/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur.glsl b/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur.glsl index cce92d0fe02..0716c20d029 100644 --- a/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur.glsl +++ b/source/blender/compositor/realtime_compositor/shaders/compositor_van_vliet_gaussian_blur.glsl @@ -56,7 +56,7 @@ void main() * boundary condition, so we initialize all inputs by the boundary pixel. */ ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y); vec4 input_boundary = texture_load(input_tx, boundary_texel); - vec4 inputs[FILTER_ORDER + 1] = vec4[](input_boundary, input_boundary, input_boundary); + vec4 inputs[FILTER_ORDER + 1] = float4_array(input_boundary, input_boundary, input_boundary); /* Create an array that holds the last FILTER_ORDER outputs along with the current output. The * current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann @@ -64,7 +64,7 @@ void main() * boundary coefficient. See the VanVlietGaussianCoefficients class for more information on the * boundary handing. */ vec4 output_boundary = input_boundary * boundary_coefficient; - vec4 outputs[FILTER_ORDER + 1] = vec4[](output_boundary, output_boundary, output_boundary); + vec4 outputs[FILTER_ORDER + 1] = float4_array(output_boundary, output_boundary, output_boundary); for (int x = 0; x < width; x++) { /* Run forward across rows for the causal filter and backward for the non causal filter. */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl index 18d0a8d4c3d..35e537c34c6 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_lib.glsl @@ -57,8 +57,8 @@ const float dof_layer_offset = 0.5 + 0.5; const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS; -const uvec2 quad_offsets_u[4] = uvec2[4](uvec2(0, 1), uvec2(1, 1), uvec2(1, 0), uvec2(0, 0)); -const vec2 quad_offsets[4] = vec2[4]( +const uvec2 quad_offsets_u[4] = uint2_array(uvec2(0, 1), uvec2(1, 1), uvec2(1, 0), uvec2(0, 0)); +const vec2 quad_offsets[4] = float2_array( vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5)); /** \} */ diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl index 2880c749b36..7ca3bfa557b 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_resolve_comp.glsl @@ -80,7 +80,7 @@ vec3 dof_neighborhood_clamp(vec2 frag_coord, vec3 color, float center_coc, float { /* Stabilize color by clamping with the stable half res neighborhood. */ vec3 neighbor_min, neighbor_max; - const vec2 corners[4] = vec2[4](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1)); + const vec2 corners[4] = float2_array(vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1)); for (int i = 0; i < 4; i++) { /** * Visit the 4 half-res texels around (and containing) the full-resolution texel. diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl index aaa8be4c8c8..5ee1665a73f 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_depth_of_field_stabilize_comp.glsl @@ -125,7 +125,7 @@ float dof_bilateral_weight(float reference_coc, float sample_coc) DofSample dof_spatial_filtering() { /* Plus (+) shape offsets. */ - const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); + const ivec2 plus_offsets[4] = int2_array(ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); DofSample center = dof_fetch_input_sample(ivec2(0)); DofSample accum = DofSample(vec4(0.0), 0.0); float accum_weight = 0.0; @@ -165,7 +165,7 @@ struct DofNeighborhoodMinMax { DofNeighborhoodMinMax dof_neighbor_boundbox() { /* Plus (+) shape offsets. */ - const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); + const ivec2 plus_offsets[4] = int2_array(ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1)); /** * Simple bounding box calculation in YCoCg as described in: * "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014 @@ -183,7 +183,7 @@ DofNeighborhoodMinMax dof_neighbor_boundbox() * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */ DofSample min_c_3x3 = min_c; DofSample max_c_3x3 = max_c; - const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1)); + const ivec2 corners[4] = int2_array(ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1)); for (int i = 0; i < 4; i++) { DofSample samp = dof_fetch_input_sample(corners[i]); min_c_3x3.color = min(min_c_3x3.color, samp.color); @@ -206,7 +206,7 @@ vec2 dof_pixel_history_motion_vector(ivec2 texel_sample) * Dilate velocity by using the nearest pixel in a cross pattern. * "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014 (Slide 27) */ - const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2)); + const ivec2 corners[4] = int2_array(ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2)); float min_depth = dof_fetch_half_depth(ivec2(0)); ivec2 nearest_texel = ivec2(0); for (int i = 0; i < 4; i++) { diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_vert.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_vert.glsl index c3df0eb2d1a..c56ab377e4c 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_vert.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_planar_vert.glsl @@ -9,13 +9,13 @@ void main() { /* Constant array moved inside function scope. * Minimizes local register allocation in MSL. */ - const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0), - vec2(1.0, -1.0), - vec2(-1.0, 1.0), + const vec2 pos[6] = float2_array(vec2(-1.0, -1.0), + vec2(1.0, -1.0), + vec2(-1.0, 1.0), - vec2(1.0, -1.0), - vec2(1.0, 1.0), - vec2(-1.0, 1.0)); + vec2(1.0, -1.0), + vec2(1.0, 1.0), + vec2(-1.0, 1.0)); vec2 lP = pos[gl_VertexID % 6]; int display_index = gl_VertexID / 6; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_vert.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_vert.glsl index 9214f98d390..91a0d544396 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_vert.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_sphere_vert.glsl @@ -9,13 +9,13 @@ void main() { /* Constant array moved inside function scope. * Minimizes local register allocation in MSL. */ - const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0), - vec2(1.0, -1.0), - vec2(-1.0, 1.0), + const vec2 pos[6] = float2_array(vec2(-1.0, -1.0), + vec2(1.0, -1.0), + vec2(-1.0, 1.0), - vec2(1.0, -1.0), - vec2(1.0, 1.0), - vec2(-1.0, 1.0)); + vec2(1.0, -1.0), + vec2(1.0, 1.0), + vec2(-1.0, 1.0)); lP = pos[gl_VertexID % 6]; int display_index = gl_VertexID / 6; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_volume_vert.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_volume_vert.glsl index 53625e9d73c..f3eb95f255c 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_volume_vert.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_display_lightprobe_volume_vert.glsl @@ -9,13 +9,13 @@ void main() { /* Constant array moved inside function scope. * Minimizes local register allocation in MSL. */ - const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0), - vec2(1.0, -1.0), - vec2(-1.0, 1.0), + const vec2 pos[6] = float2_array(vec2(-1.0, -1.0), + vec2(1.0, -1.0), + vec2(-1.0, 1.0), - vec2(1.0, -1.0), - vec2(1.0, 1.0), - vec2(-1.0, 1.0)); + vec2(1.0, -1.0), + vec2(1.0, 1.0), + vec2(-1.0, 1.0)); lP = pos[gl_VertexID % 6]; int cell_index = gl_VertexID / 6; diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl index 1e7b840eb0e..b5e91d67e7a 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_film_lib.glsl @@ -195,7 +195,7 @@ void film_cryptomatte_layer_accum_and_store( return; } /* x = hash, y = accumulated weight. Only keep track of 4 highest weighted samples. */ - vec2 crypto_samples[4] = vec2[4](vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0)); + vec2 crypto_samples[4] = float2_array(vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0)); for (int i = 0; i < samples_len; i++) { FilmSample src = film_sample_get(i, texel_film); film_sample_cryptomatte_accum(src, layer_component, cryptomatte_tx, crypto_samples); @@ -246,7 +246,7 @@ vec2 film_pixel_history_motion_vector(ivec2 texel_sample) * Dilate velocity by using the nearest pixel in a cross pattern. * "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014 (Slide 27) */ - const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2)); + const ivec2 corners[4] = int2_array(ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2)); float min_depth = texelFetch(depth_tx, texel_sample, 0).x; ivec2 nearest_texel = texel_sample; for (int i = 0; i < 4; i++) { @@ -342,11 +342,11 @@ vec4 film_sample_catmull_rom(sampler2D color_tx, vec2 input_texel) void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c) { /* Plus (+) shape offsets. */ - const ivec2 plus_offsets[5] = ivec2[5](ivec2(0, 0), /* Center */ - ivec2(-1, 0), - ivec2(0, -1), - ivec2(1, 0), - ivec2(0, 1)); + const ivec2 plus_offsets[5] = int2_array(ivec2(0, 0), /* Center */ + ivec2(-1, 0), + ivec2(0, -1), + ivec2(1, 0), + ivec2(0, 1)); #if 0 /** * Compute Variance of neighborhood as described in: @@ -389,7 +389,7 @@ void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c * Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */ vec4 min_c_3x3 = min_c; vec4 max_c_3x3 = max_c; - const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1)); + const ivec2 corners[4] = int2_array(ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1)); for (int i = 0; i < 4; i++) { vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + corners[i]); min_c_3x3 = min(min_c_3x3, color); diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_light_shadow_setup_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_light_shadow_setup_comp.glsl index e5de0fb1cd5..0b7a0370699 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_light_shadow_setup_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_light_shadow_setup_comp.glsl @@ -220,22 +220,22 @@ void cubeface_sync(int tilemap_id, /* Use switch instead of inline array of float3x3. */ switch (cubeface) { case Z_NEG: - viewmat = mat4x4(mat3x3(+1, +0, +0, +0, +1, +0, +0, +0, +1)) * viewmat; + viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, +1, +0, +0, +0, +1)) * viewmat; break; case X_POS: - viewmat = mat4x4(mat3x3(+0, +0, -1, -1, +0, +0, +0, +1, +0)) * viewmat; + viewmat = to_float4x4(mat3x3(+0, +0, -1, -1, +0, +0, +0, +1, +0)) * viewmat; break; case X_NEG: - viewmat = mat4x4(mat3x3(+0, +0, +1, +1, +0, +0, +0, +1, +0)) * viewmat; + viewmat = to_float4x4(mat3x3(+0, +0, +1, +1, +0, +0, +0, +1, +0)) * viewmat; break; case Y_POS: - viewmat = mat4x4(mat3x3(+1, +0, +0, +0, +0, -1, +0, +1, +0)) * viewmat; + viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, +0, -1, +0, +1, +0)) * viewmat; break; case Y_NEG: - viewmat = mat4x4(mat3x3(-1, +0, +0, +0, +0, +1, +0, +1, +0)) * viewmat; + viewmat = to_float4x4(mat3x3(-1, +0, +0, +0, +0, +1, +0, +1, +0)) * viewmat; break; case Z_POS: - viewmat = mat4x4(mat3x3(+1, +0, +0, +0, -1, +0, +0, +0, -1)) * viewmat; + viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, -1, +0, +0, +0, -1)) * viewmat; break; } @@ -289,7 +289,7 @@ void main() * set to 0 only when the light radius is also 0 to detect this case. */ } else { - light.object_to_world = transform_from_matrix(mat4x4(from_up_axis(shadow_direction))); + light.object_to_world = transform_from_matrix(to_float4x4(from_up_axis(shadow_direction))); } } diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl index 4773db056e4..4db3747da01 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_eval_lib.glsl @@ -49,7 +49,7 @@ vec3 lightprobe_sphere_parallax(SphereProbeData probe, vec3 P, vec3 L) } /* Correct reflection ray using parallax volume intersection. */ vec3 lP = vec4(P, 1.0) * probe.world_to_probe_transposed; - vec3 lL = (mat3x3(probe.world_to_probe_transposed) * L) / probe.parallax_distance; + vec3 lL = (to_float3x3(probe.world_to_probe_transposed) * L) / probe.parallax_distance; float dist = (probe.parallax_shape == SHAPE_ELIPSOID) ? line_unit_sphere_intersect_dist(lP, lL) : line_unit_box_intersect_dist(lP, lL); diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_eval_lib.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_eval_lib.glsl index 85a6a003f3f..d76b19e5a5a 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_eval_lib.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_eval_lib.glsl @@ -170,7 +170,7 @@ SphericalHarmonicL1 lightprobe_volume_sample( VolumeProbeData grid_data = grids_infos_buf[index]; - mat3x3 world_to_grid_transposed = mat3x3(grid_data.world_to_grid_transposed); + mat3x3 world_to_grid_transposed = to_float3x3(grid_data.world_to_grid_transposed); vec3 lNg = safe_normalize(Ng * world_to_grid_transposed); vec3 lV = safe_normalize(V * world_to_grid_transposed); diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_load_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_load_comp.glsl index 2a281406a95..ad8760cb12d 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_load_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_lightprobe_volume_load_comp.glsl @@ -104,7 +104,8 @@ void main() } /* Rotate Spherical Harmonic into world space. */ - mat3 grid_to_world_rot = normalize(mat3(grids_infos_buf[grid_index].world_to_grid_transposed)); + mat3 grid_to_world_rot = normalize( + to_float3x3(grids_infos_buf[grid_index].world_to_grid_transposed)); sh_local = spherical_harmonics_rotate(grid_to_world_rot, sh_local); SphericalHarmonicL1 sh_visibility; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl index 4516f216f60..a07dd1a819e 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_envelope_solid_vert.glsl @@ -36,7 +36,7 @@ void main() sp = bone_mat * sp.xzy + headSphere.xyz; nor = bone_mat * nor.xzy; - normalView = mat3(drw_view.viewmat) * nor; + normalView = to_float3x3(drw_view.viewmat) * nor; finalStateColor = stateColor; finalBoneColor = boneColor; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl index 999f8a01fea..850b7de837e 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert.glsl @@ -34,7 +34,7 @@ VertOut vertex_main(VertIn v_in) /* This is slow and run per vertex, but it's still faster than * doing it per instance on CPU and sending it on via instance attribute. */ - mat3 normal_mat = transpose(inverse(mat3(model_mat))); + mat3 normal_mat = transpose(inverse(to_float3x3(model_mat))); VertOut v_out; v_out.vPos = view_pos.xyz; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert_no_geom.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert_no_geom.glsl index 859b6110795..1f1fd452b0d 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert_no_geom.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_outline_vert_no_geom.glsl @@ -39,7 +39,7 @@ void do_vertex_shader(mat4 in_inst_obmat, /* This is slow and run per vertex, but it's still faster than * doing it per instance on CPU and sending it on via instance attribute. */ - mat3 normal_mat = transpose(inverse(mat3(model_mat))); + mat3 normal_mat = transpose(inverse(to_float3x3(model_mat))); out_ssPos = proj(out_pPos); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl index 4c2ae74279c..3f2fcb9fe02 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_shape_solid_vert.glsl @@ -15,7 +15,7 @@ void main() /* This is slow and run per vertex, but it's still faster than * doing it per instance on CPU and sending it on via instance attribute. */ - mat3 normal_mat = transpose(inverse(mat3(model_mat))); + mat3 normal_mat = transpose(inverse(to_float3x3(model_mat))); vec3 normal = normalize(normal_world_to_view(normal_mat * nor)); inverted = int(dot(cross(model_mat[0].xyz, model_mat[1].xyz), model_mat[2].xyz) < 0.0); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl index c7d79074a42..c29c600b594 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_outline_vert.glsl @@ -27,7 +27,7 @@ void main() * In perspective mode it's also the view-space position * of the sphere center. */ vec3 cam_ray = (is_persp) ? model_view_matrix[3].xyz : vec3(0.0, 0.0, -1.0); - cam_ray = mat3(sphereMatrix) * cam_ray; + cam_ray = to_float3x3(sphereMatrix) * cam_ray; /* Sphere center distance from the camera (persp) in local space. */ float cam_dist = length(cam_ray); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl index 2b4983ca015..d94121f684a 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_armature_sphere_solid_vert.glsl @@ -25,7 +25,7 @@ void main() * In perspective mode it's also the view-space position * of the sphere center. */ vec3 cam_ray = (is_persp) ? model_view_matrix[3].xyz : vec3(0.0, 0.0, -1.0); - cam_ray = mat3(sphereMatrix) * cam_ray; + cam_ray = to_float3x3(sphereMatrix) * cam_ray; /* Sphere center distance from the camera (persp) in local space. */ float cam_dist = length(cam_ray); diff --git a/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl index 02a505475f2..79441c26cd9 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_background_frag.glsl @@ -11,10 +11,10 @@ float dither(void) { /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader * memory pressure. */ - const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)), - vec4(P(12.0), P(4.0), P(14.0), P(6.0)), - vec4(P(3.0), P(11.0), P(1.0), P(9.0)), - vec4(P(15.0), P(7.0), P(13.0), P(5.0))); + const vec4 dither_mat4x4[4] = float4_array(vec4(P(0.0), P(8.0), P(2.0), P(10.0)), + vec4(P(12.0), P(4.0), P(14.0), P(6.0)), + vec4(P(3.0), P(11.0), P(1.0), P(9.0)), + vec4(P(15.0), P(7.0), P(13.0), P(5.0))); ivec2 co = ivec2(gl_FragCoord.xy) % 4; return dither_mat4x4[co.x][co.y]; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl index 23d90a3160e..f844af0559a 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_edit_mesh_skin_root_vert.glsl @@ -8,7 +8,7 @@ void main() { - mat3 imat = mat3(ModelMatrixInverse); + mat3 imat = to_float3x3(ModelMatrixInverse); vec3 right = normalize(imat * ViewMatrixInverse[0].xyz); vec3 up = normalize(imat * ViewMatrixInverse[1].xyz); #ifdef VERTEX_PULL diff --git a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl index d74553ce5af..897347b7a40 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_outline_prepass_curves_vert.glsl @@ -61,7 +61,7 @@ void main() thick_time = thickness * (thick_time * 2.0 - 1.0); /* Take object scale into account. * NOTE: This only works fine with uniform scaling. */ - float scale = 1.0 / length(mat3(ModelMatrixInverse) * binor); + float scale = 1.0 / length(to_float3x3(ModelMatrixInverse) * binor); world_pos = center_wpos + binor * thick_time * scale; } else { diff --git a/source/blender/draw/engines/overlay/shaders/overlay_paint_weight_frag.glsl b/source/blender/draw/engines/overlay/shaders/overlay_paint_weight_frag.glsl index df4953f5571..581a71a03e5 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_paint_weight_frag.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_paint_weight_frag.glsl @@ -80,7 +80,7 @@ void main() /* Weights are available */ else { float weight = weight_interp.x; - vec4 weight_color = texture(colorramp, weight, 0); + vec4 weight_color = texture(colorramp, weight); weight_color = apply_color_fac(weight_color); /* Contour display */ diff --git a/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl index f1150a8ffe7..eaa84ab0836 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_volume_gridlines_vert.glsl @@ -87,14 +87,14 @@ void main() #endif /* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader * memory pressure. */ - const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0); + const int indices[8] = int_array(0, 1, 1, 2, 2, 3, 3, 0); /* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid * overlapping of the outlines. */ - const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0), - vec3(0.45, 0.45, 0.0), - vec3(0.45, -0.45, 0.0), - vec3(-0.45, -0.45, 0.0)); + const vec3 corners[4] = float3_array(vec3(-0.45, 0.45, 0.0), + vec3(0.45, 0.45, 0.0), + vec3(0.45, -0.45, 0.0), + vec3(-0.45, -0.45, 0.0)); vec3 pos = domainOriginOffset + cellSize * (vec3(cell_co + adaptiveCellOffset) + cell_offset); vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 8]]; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl index 52600063a36..095fc916eba 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_volume_velocity_vert.glsl @@ -178,12 +178,12 @@ void main() # ifdef USE_NEEDLE /* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader * memory pressure. */ - const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5), - vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5), - vec3(0.2 * 0.866, -0.2 * 0.5, -0.5), - vec3(0.0, 0.0, 0.5)); + const vec3 corners[4] = float3_array(vec3(0.0, 0.2, -0.5), + vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5), + vec3(0.2 * 0.866, -0.2 * 0.5, -0.5), + vec3(0.0, 0.0, 0.5)); - const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3); + const int indices[12] = int_array(0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3); vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 12]]; pos += rotated_pos * vector_length * displaySize * cellSize; diff --git a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl index 9124c456303..799213d9657 100644 --- a/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl +++ b/source/blender/draw/engines/overlay/shaders/overlay_wireframe_vert.glsl @@ -100,7 +100,7 @@ void main() if (isHair) { mat4 obmat = hairDupliMatrix; wpos = (obmat * vec4(pos, 1.0)).xyz; - wnor = -normalize(mat3(obmat) * nor); + wnor = -normalize(to_float3x3(obmat) * nor); } bool is_persp = (drw_view.winmat[3][3] == 0.0); diff --git a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl index 0bb8741396d..5c4528880cc 100644 --- a/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl +++ b/source/blender/draw/engines/workbench/shaders/workbench_volume_frag.glsl @@ -190,10 +190,10 @@ vec4 volume_integration(vec3 ray_ori, vec3 ray_dir, float ray_inc, float ray_max { /* NOTE: Constant array declared inside function scope to reduce shader core thread memory * pressure on Apple Silicon. */ - const vec4 dither_mat[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)), - vec4(P(12.0), P(4.0), P(14.0), P(6.0)), - vec4(P(3.0), P(11.0), P(1.0), P(9.0)), - vec4(P(15.0), P(7.0), P(13.0), P(5.0))); + const vec4 dither_mat[4] = float4_array(vec4(P(0.0), P(8.0), P(2.0), P(10.0)), + vec4(P(12.0), P(4.0), P(14.0), P(6.0)), + vec4(P(3.0), P(11.0), P(1.0), P(9.0)), + vec4(P(15.0), P(7.0), P(13.0), P(5.0))); /* Start with full transmittance and no scattered light. */ vec3 final_scattering = vec3(0.0); float final_transmittance = 1.0; diff --git a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl index d740bd2c308..42991490568 100644 --- a/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl +++ b/source/blender/draw/intern/shaders/common_debug_draw_lib.glsl @@ -176,7 +176,7 @@ void drw_debug_sphere(vec3 p, float radius) */ void drw_debug_matrix(mat4 mat, vec4 v_color) { - vec4 p[4] = vec4[4](vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1)); + vec4 p[4] = float4_array(vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1)); for (int i = 0; i < 4; i++) { p[i] = mat * p[i]; p[i].xyz /= p[i].w; @@ -195,14 +195,14 @@ void drw_debug_matrix(mat4 mat) */ void drw_debug_matrix_as_bbox(mat4 mat, vec4 v_color) { - vec4 p[8] = vec4[8](vec4(-1, -1, -1, 1), - vec4(1, -1, -1, 1), - vec4(1, 1, -1, 1), - vec4(-1, 1, -1, 1), - vec4(-1, -1, 1, 1), - vec4(1, -1, 1, 1), - vec4(1, 1, 1, 1), - vec4(-1, 1, 1, 1)); + vec4 p[8] = float4_array(vec4(-1, -1, -1, 1), + vec4(1, -1, -1, 1), + vec4(1, 1, -1, 1), + vec4(-1, 1, -1, 1), + vec4(-1, -1, 1, 1), + vec4(1, -1, 1, 1), + vec4(1, 1, 1, 1), + vec4(-1, 1, 1, 1)); for (int i = 0; i < 8; i++) { p[i] = mat * p[i]; p[i].xyz /= p[i].w; diff --git a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl index 0cd66ceedc2..72d868fddf4 100644 --- a/source/blender/draw/intern/shaders/common_debug_print_lib.glsl +++ b/source/blender/draw/intern/shaders/common_debug_print_lib.glsl @@ -124,7 +124,7 @@ void drw_print_value_binary(uint value) { drw_print_no_endl("0b"); drw_print_string_start(10u * 4u); - uint digits[10] = uint[10](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u); + uint digits[10] = uint_array(0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u); uint digit = 0u; for (uint i = 0u; i < 32u; i++) { drw_print_append_digit(((value >> i) & 1u), digits[digit / 4u]); @@ -155,7 +155,7 @@ void drw_print_value_uint(uint value, const bool hex, bool is_negative, const bo drw_print_string_start(3u * 4u); const uint blank_value = hex ? 0x30303030u : 0x20202020u; const uint prefix = hex ? 0x78302020u : 0x20202020u; - uint digits[3] = uint[3](blank_value, blank_value, prefix); + uint digits[3] = uint_array(blank_value, blank_value, prefix); const uint base = hex ? 16u : 10u; uint digit = 0u; /* Add `u` suffix. */ @@ -247,7 +247,7 @@ void drw_print_value(float val) * and exponent (4). */ const float significant_digits = 6.0; drw_print_string_start(3u * 4u); - uint digits[3] = uint[3](0x20202020u, 0x20202020u, 0x20202020u); + uint digits[3] = uint_array(0x20202020u, 0x20202020u, 0x20202020u); float exponent = floor(log(abs(val)) / log(10.0)); bool display_exponent = exponent >= (significant_digits) || diff --git a/source/blender/draw/intern/shaders/common_hair_lib.glsl b/source/blender/draw/intern/shaders/common_hair_lib.glsl index febdde6e9bd..679ff9066bf 100644 --- a/source/blender/draw/intern/shaders/common_hair_lib.glsl +++ b/source/blender/draw/intern/shaders/common_hair_lib.glsl @@ -210,7 +210,7 @@ void hair_get_center_pos_tan_binor_time(bool is_persp, mat4 obmat = hairDupliMatrix; wpos = (obmat * vec4(wpos, 1.0)).xyz; - wtan = -normalize(mat3(obmat) * wtan); + wtan = -normalize(to_float3x3(obmat) * wtan); vec3 camera_vec = (is_persp) ? camera_pos - wpos : camera_z; wbinor = normalize(cross(camera_vec, wtan)); @@ -236,7 +236,7 @@ void hair_get_pos_tan_binor_time(bool is_persp, thick_time = thickness * (thick_time * 2.0 - 1.0); /* Take object scale into account. * NOTE: This only works fine with uniform scaling. */ - float scale = 1.0 / length(mat3(invmodel_mat) * wbinor); + float scale = 1.0 / length(to_float3x3(invmodel_mat) * wbinor); wpos += wbinor * thick_time * scale; } else { diff --git a/source/blender/draw/intern/shaders/common_math_lib.glsl b/source/blender/draw/intern/shaders/common_math_lib.glsl index 59936d5ccfc..7fe6437d29a 100644 --- a/source/blender/draw/intern/shaders/common_math_lib.glsl +++ b/source/blender/draw/intern/shaders/common_math_lib.glsl @@ -41,7 +41,7 @@ mat3 mul(mat3 m1, mat3 m2) # ifndef GPU_SHADER_MATH_MATRIX_LIB_GLSL vec3 transform_direction(mat4 m, vec3 v) { - return mat3(m) * v; + return to_float3x3(m) * v; } vec3 transform_point(mat4 m, vec3 v) { diff --git a/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl b/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl index 10586e02876..ffb421c0131 100644 --- a/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl +++ b/source/blender/draw/intern/shaders/common_pointcloud_lib.glsl @@ -37,7 +37,7 @@ void pointcloud_get_pos_and_radius(out vec3 outpos, out float outradius) int id = pointcloud_get_point_id(); vec4 pos_rad = texelFetch(ptcloud_pos_rad_tx, id); outpos = point_object_to_world(pos_rad.xyz); - outradius = dot(abs(mat3(ModelMatrix) * pos_rad.www), vec3(1.0 / 3.0)); + outradius = dot(abs(to_float3x3(ModelMatrix) * pos_rad.www), vec3(1.0 / 3.0)); } /* Return world position and normal. */ diff --git a/source/blender/draw/intern/shaders/common_view_lib.glsl b/source/blender/draw/intern/shaders/common_view_lib.glsl index 068f61694ab..4fde0eea3f2 100644 --- a/source/blender/draw/intern/shaders/common_view_lib.glsl +++ b/source/blender/draw/intern/shaders/common_view_lib.glsl @@ -227,14 +227,14 @@ uniform mat4 ModelMatrixInverse; * NOTE: This is only valid because we are only using the mat3 of the ViewMatrixInverse. * ViewMatrix * transpose(ModelMatrixInverse) */ -# define NormalMatrix transpose(mat3(ModelMatrixInverse)) -# define NormalMatrixInverse transpose(mat3(ModelMatrix)) +# define NormalMatrix transpose(to_float3x3(ModelMatrixInverse)) +# define NormalMatrixInverse transpose(to_float3x3(ModelMatrix)) -# define normal_object_to_view(n) (mat3(ViewMatrix) * (NormalMatrix * n)) +# define normal_object_to_view(n) (to_float3x3(ViewMatrix) * (NormalMatrix * n)) # define normal_object_to_world(n) (NormalMatrix * n) # define normal_world_to_object(n) (NormalMatrixInverse * n) -# define normal_world_to_view(n) (mat3(ViewMatrix) * n) -# define normal_view_to_world(n) (mat3(ViewMatrixInverse) * n) +# define normal_world_to_view(n) (to_float3x3(ViewMatrix) * n) +# define normal_view_to_world(n) (to_float3x3(ViewMatrixInverse) * n) # define point_object_to_ndc(p) \ (ProjectionMatrix * (ViewMatrix * vec4((ModelMatrix * vec4(p, 1.0)).xyz, 1.0))) diff --git a/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl index 518618aaa40..e70e2c9f873 100644 --- a/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl +++ b/source/blender/draw/intern/shaders/draw_debug_print_display_frag.glsl @@ -9,102 +9,102 @@ bool char_intersect(uvec2 bitmap_position) { /* Using 8x8 = 64bits = uvec2. */ - uvec2 ascii_bitmap[96] = uvec2[96](uvec2(0x00000000u, 0x00000000u), - uvec2(0x18001800u, 0x183c3c18u), - uvec2(0x00000000u, 0x36360000u), - uvec2(0x7f363600u, 0x36367f36u), - uvec2(0x301f0c00u, 0x0c3e031eu), - uvec2(0x0c666300u, 0x00633318u), - uvec2(0x3b336e00u, 0x1c361c6eu), - uvec2(0x00000000u, 0x06060300u), - uvec2(0x060c1800u, 0x180c0606u), - uvec2(0x180c0600u, 0x060c1818u), - uvec2(0x3c660000u, 0x00663cffu), - uvec2(0x0c0c0000u, 0x000c0c3fu), - uvec2(0x000c0c06u, 0x00000000u), - uvec2(0x00000000u, 0x0000003fu), - uvec2(0x000c0c00u, 0x00000000u), - uvec2(0x06030100u, 0x6030180cu), - uvec2(0x6f673e00u, 0x3e63737bu), - uvec2(0x0c0c3f00u, 0x0c0e0c0cu), - uvec2(0x06333f00u, 0x1e33301cu), - uvec2(0x30331e00u, 0x1e33301cu), - uvec2(0x7f307800u, 0x383c3633u), - uvec2(0x30331e00u, 0x3f031f30u), - uvec2(0x33331e00u, 0x1c06031fu), - uvec2(0x0c0c0c00u, 0x3f333018u), - uvec2(0x33331e00u, 0x1e33331eu), - uvec2(0x30180e00u, 0x1e33333eu), - uvec2(0x000c0c00u, 0x000c0c00u), - uvec2(0x000c0c06u, 0x000c0c00u), - uvec2(0x060c1800u, 0x180c0603u), - uvec2(0x003f0000u, 0x00003f00u), - uvec2(0x180c0600u, 0x060c1830u), - uvec2(0x0c000c00u, 0x1e333018u), - uvec2(0x7b031e00u, 0x3e637b7bu), - uvec2(0x3f333300u, 0x0c1e3333u), - uvec2(0x66663f00u, 0x3f66663eu), - uvec2(0x03663c00u, 0x3c660303u), - uvec2(0x66361f00u, 0x1f366666u), - uvec2(0x16467f00u, 0x7f46161eu), - uvec2(0x16060f00u, 0x7f46161eu), - uvec2(0x73667c00u, 0x3c660303u), - uvec2(0x33333300u, 0x3333333fu), - uvec2(0x0c0c1e00u, 0x1e0c0c0cu), - uvec2(0x33331e00u, 0x78303030u), - uvec2(0x36666700u, 0x6766361eu), - uvec2(0x46667f00u, 0x0f060606u), - uvec2(0x6b636300u, 0x63777f7fu), - uvec2(0x73636300u, 0x63676f7bu), - uvec2(0x63361c00u, 0x1c366363u), - uvec2(0x06060f00u, 0x3f66663eu), - uvec2(0x3b1e3800u, 0x1e333333u), - uvec2(0x36666700u, 0x3f66663eu), - uvec2(0x38331e00u, 0x1e33070eu), - uvec2(0x0c0c1e00u, 0x3f2d0c0cu), - uvec2(0x33333f00u, 0x33333333u), - uvec2(0x331e0c00u, 0x33333333u), - uvec2(0x7f776300u, 0x6363636bu), - uvec2(0x1c366300u, 0x6363361cu), - uvec2(0x0c0c1e00u, 0x3333331eu), - uvec2(0x4c667f00u, 0x7f633118u), - uvec2(0x06061e00u, 0x1e060606u), - uvec2(0x30604000u, 0x03060c18u), - uvec2(0x18181e00u, 0x1e181818u), - uvec2(0x00000000u, 0x081c3663u), - uvec2(0x000000ffu, 0x00000000u), - uvec2(0x00000000u, 0x0c0c1800u), - uvec2(0x3e336e00u, 0x00001e30u), - uvec2(0x66663b00u, 0x0706063eu), - uvec2(0x03331e00u, 0x00001e33u), - uvec2(0x33336e00u, 0x3830303eu), - uvec2(0x3f031e00u, 0x00001e33u), - uvec2(0x06060f00u, 0x1c36060fu), - uvec2(0x333e301fu, 0x00006e33u), - uvec2(0x66666700u, 0x0706366eu), - uvec2(0x0c0c1e00u, 0x0c000e0cu), - uvec2(0x3033331eu, 0x30003030u), - uvec2(0x1e366700u, 0x07066636u), - uvec2(0x0c0c1e00u, 0x0e0c0c0cu), - uvec2(0x7f6b6300u, 0x0000337fu), - uvec2(0x33333300u, 0x00001f33u), - uvec2(0x33331e00u, 0x00001e33u), - uvec2(0x663e060fu, 0x00003b66u), - uvec2(0x333e3078u, 0x00006e33u), - uvec2(0x66060f00u, 0x00003b6eu), - uvec2(0x1e301f00u, 0x00003e03u), - uvec2(0x0c2c1800u, 0x080c3e0cu), - uvec2(0x33336e00u, 0x00003333u), - uvec2(0x331e0c00u, 0x00003333u), - uvec2(0x7f7f3600u, 0x0000636bu), - uvec2(0x1c366300u, 0x00006336u), - uvec2(0x333e301fu, 0x00003333u), - uvec2(0x0c263f00u, 0x00003f19u), - uvec2(0x0c0c3800u, 0x380c0c07u), - uvec2(0x18181800u, 0x18181800u), - uvec2(0x0c0c0700u, 0x070c0c38u), - uvec2(0x00000000u, 0x6e3b0000u), - uvec2(0x00000000u, 0x00000000u)); + uvec2 ascii_bitmap[96] = uint2_array(uvec2(0x00000000u, 0x00000000u), + uvec2(0x18001800u, 0x183c3c18u), + uvec2(0x00000000u, 0x36360000u), + uvec2(0x7f363600u, 0x36367f36u), + uvec2(0x301f0c00u, 0x0c3e031eu), + uvec2(0x0c666300u, 0x00633318u), + uvec2(0x3b336e00u, 0x1c361c6eu), + uvec2(0x00000000u, 0x06060300u), + uvec2(0x060c1800u, 0x180c0606u), + uvec2(0x180c0600u, 0x060c1818u), + uvec2(0x3c660000u, 0x00663cffu), + uvec2(0x0c0c0000u, 0x000c0c3fu), + uvec2(0x000c0c06u, 0x00000000u), + uvec2(0x00000000u, 0x0000003fu), + uvec2(0x000c0c00u, 0x00000000u), + uvec2(0x06030100u, 0x6030180cu), + uvec2(0x6f673e00u, 0x3e63737bu), + uvec2(0x0c0c3f00u, 0x0c0e0c0cu), + uvec2(0x06333f00u, 0x1e33301cu), + uvec2(0x30331e00u, 0x1e33301cu), + uvec2(0x7f307800u, 0x383c3633u), + uvec2(0x30331e00u, 0x3f031f30u), + uvec2(0x33331e00u, 0x1c06031fu), + uvec2(0x0c0c0c00u, 0x3f333018u), + uvec2(0x33331e00u, 0x1e33331eu), + uvec2(0x30180e00u, 0x1e33333eu), + uvec2(0x000c0c00u, 0x000c0c00u), + uvec2(0x000c0c06u, 0x000c0c00u), + uvec2(0x060c1800u, 0x180c0603u), + uvec2(0x003f0000u, 0x00003f00u), + uvec2(0x180c0600u, 0x060c1830u), + uvec2(0x0c000c00u, 0x1e333018u), + uvec2(0x7b031e00u, 0x3e637b7bu), + uvec2(0x3f333300u, 0x0c1e3333u), + uvec2(0x66663f00u, 0x3f66663eu), + uvec2(0x03663c00u, 0x3c660303u), + uvec2(0x66361f00u, 0x1f366666u), + uvec2(0x16467f00u, 0x7f46161eu), + uvec2(0x16060f00u, 0x7f46161eu), + uvec2(0x73667c00u, 0x3c660303u), + uvec2(0x33333300u, 0x3333333fu), + uvec2(0x0c0c1e00u, 0x1e0c0c0cu), + uvec2(0x33331e00u, 0x78303030u), + uvec2(0x36666700u, 0x6766361eu), + uvec2(0x46667f00u, 0x0f060606u), + uvec2(0x6b636300u, 0x63777f7fu), + uvec2(0x73636300u, 0x63676f7bu), + uvec2(0x63361c00u, 0x1c366363u), + uvec2(0x06060f00u, 0x3f66663eu), + uvec2(0x3b1e3800u, 0x1e333333u), + uvec2(0x36666700u, 0x3f66663eu), + uvec2(0x38331e00u, 0x1e33070eu), + uvec2(0x0c0c1e00u, 0x3f2d0c0cu), + uvec2(0x33333f00u, 0x33333333u), + uvec2(0x331e0c00u, 0x33333333u), + uvec2(0x7f776300u, 0x6363636bu), + uvec2(0x1c366300u, 0x6363361cu), + uvec2(0x0c0c1e00u, 0x3333331eu), + uvec2(0x4c667f00u, 0x7f633118u), + uvec2(0x06061e00u, 0x1e060606u), + uvec2(0x30604000u, 0x03060c18u), + uvec2(0x18181e00u, 0x1e181818u), + uvec2(0x00000000u, 0x081c3663u), + uvec2(0x000000ffu, 0x00000000u), + uvec2(0x00000000u, 0x0c0c1800u), + uvec2(0x3e336e00u, 0x00001e30u), + uvec2(0x66663b00u, 0x0706063eu), + uvec2(0x03331e00u, 0x00001e33u), + uvec2(0x33336e00u, 0x3830303eu), + uvec2(0x3f031e00u, 0x00001e33u), + uvec2(0x06060f00u, 0x1c36060fu), + uvec2(0x333e301fu, 0x00006e33u), + uvec2(0x66666700u, 0x0706366eu), + uvec2(0x0c0c1e00u, 0x0c000e0cu), + uvec2(0x3033331eu, 0x30003030u), + uvec2(0x1e366700u, 0x07066636u), + uvec2(0x0c0c1e00u, 0x0e0c0c0cu), + uvec2(0x7f6b6300u, 0x0000337fu), + uvec2(0x33333300u, 0x00001f33u), + uvec2(0x33331e00u, 0x00001e33u), + uvec2(0x663e060fu, 0x00003b66u), + uvec2(0x333e3078u, 0x00006e33u), + uvec2(0x66060f00u, 0x00003b6eu), + uvec2(0x1e301f00u, 0x00003e03u), + uvec2(0x0c2c1800u, 0x080c3e0cu), + uvec2(0x33336e00u, 0x00003333u), + uvec2(0x331e0c00u, 0x00003333u), + uvec2(0x7f7f3600u, 0x0000636bu), + uvec2(0x1c366300u, 0x00006336u), + uvec2(0x333e301fu, 0x00003333u), + uvec2(0x0c263f00u, 0x00003f19u), + uvec2(0x0c0c3800u, 0x380c0c07u), + uvec2(0x18181800u, 0x18181800u), + uvec2(0x0c0c0700u, 0x070c0c38u), + uvec2(0x00000000u, 0x6e3b0000u), + uvec2(0x00000000u, 0x00000000u)); if (any(lessThan(bitmap_position, uvec2(0))) || any(greaterThanEqual(bitmap_position, uvec2(8)))) { diff --git a/source/blender/draw/intern/shaders/draw_model_lib.glsl b/source/blender/draw/intern/shaders/draw_model_lib.glsl index 73a01a0f474..5dac14408cb 100644 --- a/source/blender/draw/intern/shaders/draw_model_lib.glsl +++ b/source/blender/draw/intern/shaders/draw_model_lib.glsl @@ -53,11 +53,11 @@ mat4x4 drw_modelinv() */ mat3x3 drw_normat() { - return transpose(mat3x3(drw_modelinv())); + return transpose(to_float3x3(drw_modelinv())); } mat3x3 drw_norinv() { - return transpose(mat3x3(drw_modelmat())); + return transpose(to_float3x3(drw_modelmat())); } /* -------------------------------------------------------------------- */ @@ -77,11 +77,11 @@ vec3 drw_normal_world_to_object(vec3 N) vec3 drw_normal_object_to_view(vec3 lN) { - return (mat3x3(drw_view.viewmat) * (drw_normat() * lN)); + return (to_float3x3(drw_view.viewmat) * (drw_normat() * lN)); } vec3 drw_normal_view_to_object(vec3 vN) { - return (drw_norinv() * (mat3x3(drw_view.viewinv) * vN)); + return (drw_norinv() * (to_float3x3(drw_view.viewinv) * vN)); } /** \} */ diff --git a/source/blender/draw/intern/shaders/draw_view_lib.glsl b/source/blender/draw/intern/shaders/draw_view_lib.glsl index 2063de31038..be2896827bb 100644 --- a/source/blender/draw/intern/shaders/draw_view_lib.glsl +++ b/source/blender/draw/intern/shaders/draw_view_lib.glsl @@ -108,12 +108,12 @@ float drw_ndc_to_screen(float ndc_P) vec3 drw_normal_view_to_world(vec3 vN) { - return (mat3x3(drw_view.viewinv) * vN); + return (to_float3x3(drw_view.viewinv) * vN); } vec3 drw_normal_world_to_view(vec3 N) { - return (mat3x3(drw_view.viewmat) * N); + return (to_float3x3(drw_view.viewmat) * N); } /** \} */ diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh index 6aa3d99de2e..dc4a4767147 100644 --- a/source/blender/gpu/GPU_shader.hh +++ b/source/blender/gpu/GPU_shader.hh @@ -41,6 +41,11 @@ struct GPUShader; */ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info); +/** + * Same as GPU_shader_create_from_info but will run preprocessor on source strings. + */ +GPUShader *GPU_shader_create_from_info_python(const GPUShaderCreateInfo *_info); + /** * Create a shader using a named #GPUShaderCreateInfo registered at startup. * These are declared inside `*_info.hh` files using the `GPU_SHADER_CREATE_INFO()` macro. diff --git a/source/blender/gpu/glsl_preprocess/CMakeLists.txt b/source/blender/gpu/glsl_preprocess/CMakeLists.txt index ad3f33e25e2..13ef8d96ea4 100644 --- a/source/blender/gpu/glsl_preprocess/CMakeLists.txt +++ b/source/blender/gpu/glsl_preprocess/CMakeLists.txt @@ -7,6 +7,8 @@ # Build `glsl_preprocess` executable. set(SRC glsl_preprocess.cc + + glsl_preprocess.hh ) # `SRC_DNA_INC` is defined in the parent directory. diff --git a/source/blender/gpu/glsl_preprocess/glsl_preprocess.cc b/source/blender/gpu/glsl_preprocess/glsl_preprocess.cc index 51386e6b420..dd524966029 100644 --- a/source/blender/gpu/glsl_preprocess/glsl_preprocess.cc +++ b/source/blender/gpu/glsl_preprocess/glsl_preprocess.cc @@ -1,4 +1,4 @@ -/* SPDX-FileCopyrightText: 2001-2002 NaN Holding BV. All rights reserved. +/* SPDX-FileCopyrightText: 2024 Blender Authors * * SPDX-License-Identifier: GPL-2.0-or-later */ @@ -6,11 +6,12 @@ * \ingroup glsl_preprocess */ -#include #include #include +#include #include -#include + +#include "glsl_preprocess.hh" int main(int argc, char **argv) { @@ -40,8 +41,29 @@ int main(int argc, char **argv) bool first_comment = true; bool inside_comment = false; + int error = 0; + size_t line_index = 0; + + auto report_error = + [&](const std::string &src_line, const std::smatch &match, const char *err_msg) { + size_t err_line = line_index; + size_t err_char = match.position(); + + std::cerr << input_file_name; + std::cerr << ':' << std::to_string(err_line) << ':' << std::to_string(err_char); + std::cerr << ": error: " << err_msg << std::endl; + std::cerr << src_line << std::endl; + std::cerr << std::string(err_char, ' ') << '^' << std::endl; + + error++; + }; + + blender::gpu::shader::Preprocessor processor(report_error); + std::string line; while (std::getline(input_file, line)) { + line_index++; + /* Remove licence headers (first comment). */ if (line.rfind("/*", 0) == 0 && first_comment) { first_comment = false; @@ -55,17 +77,17 @@ int main(int argc, char **argv) } if (skip_line) { - line = ""; + output_file << "\n"; } - else if (line.rfind("#include ", 0) == 0 || line.rfind("#pragma once", 0) == 0) { - line[0] = line[1] = '/'; + else { + processor << line << '\n'; } - - output_file << line << "\n"; } + output_file << processor.str(); + input_file.close(); output_file.close(); - return 0; + return error; } diff --git a/source/blender/gpu/glsl_preprocess/glsl_preprocess.hh b/source/blender/gpu/glsl_preprocess/glsl_preprocess.hh new file mode 100644 index 00000000000..d9642e13ddb --- /dev/null +++ b/source/blender/gpu/glsl_preprocess/glsl_preprocess.hh @@ -0,0 +1,219 @@ +/* SPDX-FileCopyrightText: 2024 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup glsl_preprocess + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace blender::gpu::shader { + +/** + * Shader source preprocessor that allow to mutate GLSL into cross API source that can be + * interpreted by the different GPU backends. Some syntax are mutated or reported as incompatible. + * + * Implementation speed is not a huge concern as we only apply this at compile time or on python + * shaders source. + */ +template class Preprocessor { + T &report_error; + + struct SharedVar { + std::string type; + std::string name; + std::string array; + }; + std::vector shared_vars_; + + std::stringstream output_; + + public: + Preprocessor(T &error_cb) : report_error(error_cb) {} + + Preprocessor &operator<<(std::string str) + { + threadgroup_variable_parsing(str); + matrix_constructor_linting(str); + array_constructor_linting(str); + str = preprocessor_directive_mutation(str); + str = argument_decorator_macro_injection(str); + str = array_constructor_macro_injection(str); + output_ << str; + return *this; + } + + Preprocessor &operator<<(char c) + { + output_ << c; + return *this; + } + + std::string str() + { + return output_.str() + suffix(); + } + + private: + std::string preprocessor_directive_mutation(const std::string &str) + { + /* Example: `#include "deps.glsl"` > `//include "deps.glsl"` */ + std::regex regex("#\\s*(include|pragma once)"); + return std::regex_replace(str, regex, "//$1"); + } + + void threadgroup_variable_parsing(std::string str) + { + std::regex regex("shared\\s+(\\w+)\\s+(\\w+)([^;]*);"); + for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) { + shared_vars_.push_back({match[1].str(), match[2].str(), match[3].str()}); + } + } + + std::string argument_decorator_macro_injection(const std::string &str) + { + /* Example: `out float var[2]` > `out float _out_sta var _out_end[2]` */ + std::regex regex("(out|inout|in|shared)\\s+(\\w+)\\s+(\\w+)"); + return std::regex_replace(str, regex, "$1 $2 _$1_sta $3 _$1_end"); + } + + std::string array_constructor_macro_injection(const std::string &str) + { + /* Example: `= float[2](0.0, 0.0)` > `= ARRAY_T(float) ARRAY_V(0.0, 0.0)` */ + std::regex regex("=\\s*(\\w+)\\s*\\[[^\\]]*\\]\\s*\\("); + return std::regex_replace(str, regex, "= ARRAY_T($1) ARRAY_V("); + } + + /* TODO(fclem): Too many false positive and false negative to be applied to python shaders. */ + void matrix_constructor_linting(std::string str) + { + if constexpr (no_linting) { + return; + } + /* Example: `mat4(other_mat)`. */ + std::regex regex("\\s+(mat(\\d|\\dx\\d)|float\\dx\\d)\\([^,\\s\\d]+\\)"); + for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) { + /* This only catches some invalid usage. For the rest, the CI will catch them. */ + const char *msg = + "Matrix constructor is not cross API compatible. " + "Use to_floatNxM to reshape the matrix or use other constructors instead."; + report_error(str, match, msg); + } + } + + void array_constructor_linting(std::string str) + { + if constexpr (no_linting) { + return; + } + std::regex regex("=\\s*(\\w+)\\s*\\[[^\\]]*\\]\\s*\\("); + for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) { + /* This only catches some invalid usage. For the rest, the CI will catch them. */ + const char *msg = + "Array constructor is not cross API compatible. Use type_array instead of type[]."; + report_error(str, match, msg); + } + } + + std::string suffix() + { + if (shared_vars_.empty()) { + return ""; + } + + std::stringstream suffix; + /** + * For Metal shaders to compile, shared (threadgroup) variable cannot be declared globally. + * They must reside within a function scope. Hence, we need to extract these declarations and + * generate shared memory blocks within the entry point function. These shared memory blocks + * can then be passed as references to the remaining shader via the class function scope. + * + * The shared variable definitions from the source file are replaced with references to + * threadgroup memory blocks (using _shared_sta and _shared_end macros), but kept in-line in + * case external macros are used to declare the dimensions. + * + * Each part of the codegen is stored inside macros so that we don't have to do string + * replacement at runtime. + */ + /* Arguments of the wrapper class constructor. */ + suffix << "#undef MSL_SHARED_VARS_ARGS\n"; + /* References assignment inside wrapper class constructor. */ + suffix << "#undef MSL_SHARED_VARS_ASSIGN\n"; + /* Declaration of threadgroup variables in entry point function. */ + suffix << "#undef MSL_SHARED_VARS_DECLARE\n"; + /* Arguments for wrapper class constructor call. */ + suffix << "#undef MSL_SHARED_VARS_PASS\n"; + + /** + * Example replacement: + * + * ` + * // Source + * shared float bar[10]; // Source declaration. + * shared float foo; // Source declaration. + * // Rest of the source ... + * // End of Source + * + * // Backend Output + * class Wrapper { // Added at runtime by backend. + * + * threadgroup float (&foo); // Replaced by regex and macros. + * threadgroup float (&bar)[10]; // Replaced by regex and macros. + * // Rest of the source ... + * + * Wrapper ( // Added at runtime by backend. + * threadgroup float (&_foo), threadgroup float (&_bar)[10] // MSL_SHARED_VARS_ARGS + * ) // Added at runtime by backend. + * : foo(_foo), bar(_bar) // MSL_SHARED_VARS_ASSIGN + * {} // Added at runtime by backend. + * + * }; // End of Wrapper // Added at runtime by backend. + * + * kernel entry_point() { // Added at runtime by backend. + * + * threadgroup float foo; // MSL_SHARED_VARS_DECLARE + * threadgroup float bar[10] // MSL_SHARED_VARS_DECLARE + * + * Wrapper wrapper // Added at runtime by backend. + * (foo, bar) // MSL_SHARED_VARS_PASS + * ; // Added at runtime by backend. + * + * } // Added at runtime by backend. + * // End of Backend Output + * ` + */ + std::stringstream args, assign, declare, pass; + + bool first = true; + for (SharedVar &var : shared_vars_) { + char sep = first ? ' ' : ','; + /* */ + args << sep << "threadgroup " << var.type << "(&_" << var.name << ")" << var.array; + assign << (first ? ':' : ',') << var.name << "(_" << var.name << ")"; + declare << "threadgroup " << var.type << ' ' << var.name << var.array << ";"; + pass << sep << var.name; + first = false; + } + + suffix << "#define MSL_SHARED_VARS_ARGS " << args.str() << "\n"; + suffix << "#define MSL_SHARED_VARS_ASSIGN " << assign.str() << "\n"; + suffix << "#define MSL_SHARED_VARS_DECLARE " << declare.str() << "\n"; + suffix << "#define MSL_SHARED_VARS_PASS (" << pass.str() << ")\n"; + + return suffix.str(); + } +}; + +template class PreprocessorPython : public Preprocessor { + public: + PreprocessorPython(T &error_cb) : Preprocessor(error_cb){}; +}; + +} // namespace blender::gpu::shader diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index 119f4d3b492..1884d822d9b 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -17,6 +17,8 @@ #include "GPU_matrix.hh" #include "GPU_platform.hh" +#include "glsl_preprocess/glsl_preprocess.hh" + #include "gpu_backend.hh" #include "gpu_context_private.hh" #include "gpu_shader_create_info.hh" @@ -299,6 +301,40 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info) return wrap(Context::get()->compiler->compile(info, false)); } +static std::string preprocess_source(StringRefNull original) +{ + auto no_err_report = [](std::string, std::smatch, const char *) {}; + gpu::shader::PreprocessorPython processor(no_err_report); + processor << std::string(original); + return processor.str(); +}; + +GPUShader *GPU_shader_create_from_info_python(const GPUShaderCreateInfo *_info) +{ + using namespace blender::gpu::shader; + ShaderCreateInfo &info = *const_cast( + reinterpret_cast(_info)); + + std::string vertex_source_original = info.vertex_source_generated; + std::string fragment_source_original = info.fragment_source_generated; + std::string geometry_source_original = info.geometry_source_generated; + std::string compute_source_original = info.compute_source_generated; + + info.vertex_source_generated = preprocess_source(info.vertex_source_generated); + info.fragment_source_generated = preprocess_source(info.fragment_source_generated); + info.geometry_source_generated = preprocess_source(info.geometry_source_generated); + info.compute_source_generated = preprocess_source(info.compute_source_generated); + + GPUShader *result = wrap(Context::get()->compiler->compile(info, false)); + + info.vertex_source_generated = vertex_source_original; + info.fragment_source_generated = fragment_source_original; + info.geometry_source_generated = geometry_source_original; + info.compute_source_generated = compute_source_original; + + return result; +} + GPUShader *GPU_shader_create_from_python(const char *vertcode, const char *fragcode, const char *geomcode, @@ -315,6 +351,28 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode, libcode = libcodecat = BLI_strdupcat(libcode, datatoc_gpu_shader_colorspace_lib_glsl); } + std::string vertex_source_processed; + std::string fragment_source_processed; + std::string geometry_source_processed; + std::string library_source_processed; + + if (vertcode != nullptr) { + vertex_source_processed = preprocess_source(vertcode); + vertcode = vertex_source_processed.c_str(); + } + if (fragcode != nullptr) { + fragment_source_processed = preprocess_source(fragcode); + fragcode = fragment_source_processed.c_str(); + } + if (geomcode != nullptr) { + geometry_source_processed = preprocess_source(geomcode); + geomcode = geometry_source_processed.c_str(); + } + if (libcode != nullptr) { + library_source_processed = preprocess_source(libcode); + libcode = library_source_processed.c_str(); + } + /* Use pyGPUShader as default name for shader. */ const char *shname = name != nullptr ? name : "pyGPUShader"; diff --git a/source/blender/gpu/intern/gpu_shader_dependency.cc b/source/blender/gpu/intern/gpu_shader_dependency.cc index c6f240100e8..cef010ff09e 100644 --- a/source/blender/gpu/intern/gpu_shader_dependency.cc +++ b/source/blender/gpu/intern/gpu_shader_dependency.cc @@ -484,7 +484,13 @@ struct GPUSource { int64_t keyword_cursor = 0; out_qualifier = keyword_parse(arg, keyword_cursor); out_type = keyword_parse(arg, keyword_cursor); + /* Skip qualifier prefix macro expanded by GLSL preprocessing (e.g. _out_sta). */ + StringRef qualifier_prefix = keyword_parse(arg, keyword_cursor); out_name = keyword_parse(arg, keyword_cursor); + + if (out_qualifier == "const") { + out_name = qualifier_prefix; + } if (out_name.is_empty()) { /* No qualifier case. */ out_name = out_type; @@ -1031,8 +1037,8 @@ struct GPUSource { GPUSource *dependency_source = nullptr; { - /* Include directive has been mangled on purpose. See `datatoc.cc`. */ - pos = source.find("//nclude \"", pos + 1); + /* Include directive has been mangled on purpose. See `glsl_preprocess.hh`. */ + pos = source.find("\n//include \"", pos + 1); if (pos == -1) { return 0; } diff --git a/source/blender/gpu/metal/mtl_shader_generator.hh b/source/blender/gpu/metal/mtl_shader_generator.hh index 5de82fe1ca2..9b9d7ec7ed5 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.hh +++ b/source/blender/gpu/metal/mtl_shader_generator.hh @@ -426,8 +426,6 @@ class MSLGeneratorInterface { blender::Vector vertex_output_varyings_tf; /* Clip Distances. */ blender::Vector clip_distances; - /* Shared Memory Blocks. */ - blender::Vector shared_memory_blocks; /* Max bind IDs. */ int max_tex_bind_index = 0; /** GL Global usage. */ diff --git a/source/blender/gpu/metal/mtl_shader_generator.mm b/source/blender/gpu/metal/mtl_shader_generator.mm index 18c0b13024a..701629f1236 100644 --- a/source/blender/gpu/metal/mtl_shader_generator.mm +++ b/source/blender/gpu/metal/mtl_shader_generator.mm @@ -182,26 +182,6 @@ static void remove_singleline_comments_func(std::string &str) } } -static bool is_program_word(const char *chr, int *len) -{ - int numchars = 0; - for (const char *c = chr; *c != '\0'; c++) { - char ch = *c; - /* NOTE: Hash (`#`) is not valid in var names, but is used by Closure macro patterns. */ - if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || - (numchars > 0 && ch >= '0' && ch <= '9') || ch == '_' || ch == '#') - { - numchars++; - } - else { - *len = numchars; - return (numchars > 0); - } - } - *len = numchars; - return true; -} - static int backwards_program_word_scan(const char *array_loc, const char *min) { const char *start; @@ -267,130 +247,6 @@ static void extract_and_replace_clipping_distances(std::string &vertex_source, } } -/** - * Replace function parameter patterns containing: - * `out vec3 somevar` with `THD vec3&somevar`. - * which enables pass by reference via resolved macro: - * `thread vec3& somevar`. - */ -static void replace_outvars(std::string &str) -{ - char *current_str_begin = &*str.begin(); - char *current_str_end = &*str.end(); - - for (char *c = current_str_begin + 2; c < current_str_end - 6; c++) { - char *start = strstr(c, "out "); - if (start == nullptr) { - return; - } - else { - c = start; - if (strncmp(c - 2, "in", 2) == 0) { - start = c - 2; - } - - /* Check that the following are words. */ - int len1, len2; - char *word_base1 = c + 4; - char *word_base2 = word_base1; - - if (is_program_word(word_base1, &len1) && (*(word_base1 + len1) == ' ')) { - word_base2 = word_base1 + len1 + 1; - if (is_program_word(word_base2, &len2)) { - /* Match found. */ - bool is_array = (*(word_base2 + len2) == '['); - if (is_array) { - /* Generate out-variable pattern for arrays, of form - * `OUT(vec2,samples,CRYPTOMATTE_LEVELS_MAX)` - * replacing original `out vec2 samples[SAMPLE_LEN]` - * using 'OUT' macro declared in `mtl_shader_defines.msl`. */ - char *array_end = strchr(word_base2 + len2, ']'); - if (array_end != nullptr) { - *start = 'O'; - *(start + 1) = 'U'; - *(start + 2) = 'T'; - *(start + 3) = '('; - for (char *clear = start + 4; clear < c + 4; clear++) { - *clear = ' '; - } - *(word_base2 - 1) = ','; - *(word_base2 + len2) = ','; - *array_end = ')'; - } - } - else { - /* Generate out-variable pattern of form `THD type&var` from original `out vec4 var`. - */ - *start = 'T'; - *(start + 1) = 'H'; - *(start + 2) = 'D'; - for (char *clear = start + 3; clear < c + 4; clear++) { - *clear = ' '; - } - *(word_base2 - 1) = '&'; - } - } - } - } - } -} - -static void replace_matrix_constructors(std::string &str) -{ - - /* Replace matrix constructors with GLSL-compatible constructors for Metal. - * Base matrix constructors e.g. mat3x3 do not have as many overload variants as GLSL. - * To add compatibility, we declare custom constructors e.g. MAT3x3 in `mtl_shader_defines.msl`. - * If the GLSL syntax matches, we map mat3x3(..) -> MAT3x3(..) and implement a custom - * constructor. This supports both mat3(..) and mat3x3(..) style syntax. */ - char *current_str_begin = &*str.begin(); - char *current_str_end = &*str.end(); - - for (char *c = current_str_begin; c < current_str_end - 10; c++) { - char *base_scan = strstr(c, "mat"); - if (base_scan == nullptr) { - break; - } - /* Track end of constructor. */ - char *constructor_end = nullptr; - - /* check if next character is matrix dim. */ - c = base_scan + 3; - if (!(*c == '2' || *c == '3' || *c == '4')) { - /* Not constructor, skip. */ - continue; - } - - /* Possible multiple dimensional matrix constructor. Verify if next char is a dim. */ - c++; - if (*c == 'x') { - c++; - if (*c == '2' || *c == '3' || *c == '4') { - c++; - } - else { - /* Not matrix constructor, continue. */ - continue; - } - } - - /* Check for constructor opening brace. */ - if (*c == '(') { - constructor_end = c; - } - else { - /* Not matrix constructor, continue. */ - continue; - } - - /* If is constructor, replace with MATN(..) syntax. */ - if (constructor_end != nullptr) { - ARRAY_SET_ITEMS(base_scan, 'M', 'A', 'T'); - continue; - } - } -} - static void replace_array_initializers_func(std::string &str) { char *current_str_begin = &*str.begin(); @@ -572,135 +428,6 @@ static bool extract_ssbo_pragma_info(const MTLShader *shader, return false; } -/* Extract shared memory declaration and their parameters. - * Inserts extracted cases as entries in MSLGeneratorInterface's shared memory block - * list. These will later be used to generate shared memory declarations within the entry point. - * - * TODO(Metal/GPU): Move shared memory declarations to GPUShaderCreateInfo. This is currently a - * necessary workaround to match GLSL functionality and enable full compute shader support. In the - * long term, best to avoid needing to perform this operation. */ -void extract_shared_memory_blocks(MSLGeneratorInterface &msl_iface, - std::string &glsl_compute_source) -{ - msl_iface.shared_memory_blocks.clear(); - char *current_str_begin = &*glsl_compute_source.begin(); - char *current_str_end = &*glsl_compute_source.end(); - - for (char *c = current_str_begin; c < current_str_end - 6; c++) { - /* Find first instance of "shared ". */ - char *c_expr_start = strstr(c, "shared "); - if (c_expr_start == nullptr) { - break; - } - /* Check if "shared" was part of a previous word. If so, this is not valid. */ - if (next_word_in_range(c_expr_start - 1, c_expr_start) != nullptr) { - c += 7; /* Jump forward by length of "shared ". */ - continue; - } - - /* Jump to shared declaration and detect end of statement. */ - c = c_expr_start; - char *c_expr_end = strstr(c, ";"); - if (c_expr_end == nullptr) { - break; - } - - /* Prepare MSLSharedMemoryBlock instance. */ - MSLSharedMemoryBlock new_shared_block; - char buf[256]; - - /* Read type-name. */ - c += 7; /* Jump forward by length of "shared ". */ - c = next_word_in_range(c, c_expr_end); - if (c == nullptr) { - c = c_expr_end + 1; - continue; - } - - char *c_next_space = next_symbol_in_range(c, c_expr_end, ' '); - if (c_next_space == nullptr) { - c = c_expr_end + 1; - continue; - } - int len = c_next_space - c; - BLI_assert(len < 256); - BLI_strncpy(buf, c, len + 1); - new_shared_block.type_name = std::string(buf); - - /* Read var-name. - * `varname` can either come right before the final semi-colon, or - * with following array syntax. - * spaces may exist before closing symbol. */ - c = c_next_space + 1; - c = next_word_in_range(c, c_expr_end); - if (c == nullptr) { - c = c_expr_end + 1; - continue; - } - - char *c_array_begin = next_symbol_in_range(c, c_expr_end, '['); - c_next_space = next_symbol_in_range(c, c_expr_end, ' '); - - char *varname_end = nullptr; - if (c_array_begin != nullptr) { - /* Array path. */ - if (c_next_space != nullptr) { - varname_end = (c_next_space < c_array_begin) ? c_next_space : c_array_begin; - } - else { - varname_end = c_array_begin; - } - new_shared_block.is_array = true; - } - else { - /* Ending semi-colon. */ - if (c_next_space != nullptr) { - varname_end = (c_next_space < c_expr_end) ? c_next_space : c_expr_end; - } - else { - varname_end = c_expr_end; - } - new_shared_block.is_array = false; - } - len = varname_end - c; - BLI_assert(len < 256); - BLI_strncpy(buf, c, len + 1); - new_shared_block.varname = std::string(buf); - - /* Determine if array. */ - if (new_shared_block.is_array) { - int len = c_expr_end - c_array_begin; - BLI_strncpy(buf, c_array_begin, len + 1); - new_shared_block.array_decl = std::string(buf); - } - - /* Shared block is valid, add it to the list and replace declaration with class member. - * reference. This declaration needs to have one of the formats: - * TG int& varname; - * TG int (&varname)[len][len] - * - * In order to fit in the same space, replace `threadgroup` with `TG` macro. - */ - for (char *c = c_expr_start; c <= c_expr_end; c++) { - *c = ' '; - } - std::string out_str = "TG "; - out_str += new_shared_block.type_name; - out_str += (new_shared_block.is_array) ? "(&" : "&"; - out_str += new_shared_block.varname; - if (new_shared_block.is_array) { - out_str += ")" + new_shared_block.array_decl; - } - out_str += ";;"; - memcpy(c_expr_start, out_str.c_str(), (out_str.length() - 1) * sizeof(char)); - - /* Jump to end of statement. */ - c = c_expr_end + 1; - - msl_iface.shared_memory_blocks.append(new_shared_block); - } -} - /** \} */ /* -------------------------------------------------------------------- */ @@ -973,11 +700,9 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) } /* Special condition - mat3 and array constructor replacement. */ - replace_matrix_constructors(shd_builder_->glsl_vertex_source_); replace_array_initializers_func(shd_builder_->glsl_vertex_source_); if (!msl_iface.uses_transform_feedback) { - replace_matrix_constructors(shd_builder_->glsl_fragment_source_); replace_array_initializers_func(shd_builder_->glsl_fragment_source_); } @@ -1058,12 +783,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) /* Extract gl_ClipDistances. */ extract_and_replace_clipping_distances(shd_builder_->glsl_vertex_source_, msl_iface); - /* Replace 'out' attribute on function parameters with pass-by-reference. */ - replace_outvars(shd_builder_->glsl_vertex_source_); - if (!msl_iface.uses_transform_feedback) { - replace_outvars(shd_builder_->glsl_fragment_source_); - } - /**** METAL Shader source generation. ****/ /* Setup `stringstream` for populating generated MSL shader vertex/frag shaders. */ std::stringstream ss_vertex; @@ -1252,7 +971,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info) /* Add Texture members. * These members pack both a texture and a sampler into a single * struct, as both are needed within texture functions. - * e.g. `_mtl_combined_image_sampler_2d` + * e.g. `_mtl_sampler_2d` * The exact typename is generated inside `get_msl_typestring_wrapper()`. */ for (const MSLTextureResource &tex : msl_iface.texture_samplers) { if (bool(tex.stage & ShaderStage::VERTEX)) { @@ -1485,7 +1204,6 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i BLI_assert(shd_builder_->glsl_compute_source_.size() > 0); /*** Source cleanup. ***/ - replace_matrix_constructors(shd_builder_->glsl_compute_source_); replace_array_initializers_func(shd_builder_->glsl_compute_source_); /**** Extract usage of GL globals. ****/ @@ -1526,19 +1244,6 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i remove_multiline_comments_func(shd_builder_->glsl_compute_source_); remove_singleline_comments_func(shd_builder_->glsl_compute_source_); - /** Extract usage of shared memory. - * For Metal shaders to compile, shared (threadgroup) memory cannot be declared globally. - * It must reside within a function scope. Hence, we need to extract these uses and generate - * shared memory blocks within the entry point function, which can then be passed as references - * to the remaining shader via the class function scope. - * - * The existing block definitions are then replaced with references to threadgroup memory blocks, - * but kept in-line in case external macros are used to declare the dimensions. */ - extract_shared_memory_blocks(msl_iface, shd_builder_->glsl_compute_source_); - - /* Replace 'out' attribute on function parameters with pass-by-reference. */ - replace_outvars(shd_builder_->glsl_compute_source_); - /** Generate Compute shader stage. **/ std::stringstream ss_compute; ss_compute << "#line 1 \"msl_wrapper_code\"\n"; @@ -1599,7 +1304,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i /* Add Texture members. * These members pack both a texture and a sampler into a single * struct, as both are needed within texture functions. - * e.g. `_mtl_combined_image_sampler_2d` + * e.g. `_mtl_sampler_2d` * The exact typename is generated inside `get_msl_typestring_wrapper()`. */ for (const MSLTextureResource &tex : msl_iface.texture_samplers) { if (bool(tex.stage & ShaderStage::COMPUTE)) { @@ -1631,37 +1336,8 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i /* Compute constructor for Shared memory blocks, as we must pass * local references from entry-point function scope into the class * instantiation. */ - ss_compute << get_stage_class_name(ShaderStage::COMPUTE) << "("; - bool first = true; - if (msl_iface.shared_memory_blocks.size() > 0) { - for (const MSLSharedMemoryBlock &block : msl_iface.shared_memory_blocks) { - if (!first) { - ss_compute << ","; - } - if (block.is_array) { - ss_compute << "TG " << block.type_name << " (&_" << block.varname << ")" - << block.array_decl; - } - else { - ss_compute << "TG " << block.type_name << " &_" << block.varname; - } - ss_compute << std::endl; - first = false; - } - ss_compute << ") : "; - first = true; - for (const MSLSharedMemoryBlock &block : msl_iface.shared_memory_blocks) { - if (!first) { - ss_compute << ","; - } - ss_compute << block.varname << "(_" << block.varname << ")"; - first = false; - } - } - else { - ss_compute << ") "; - } - ss_compute << "{ }" << std::endl; + ss_compute << get_stage_class_name(ShaderStage::COMPUTE) + << "(MSL_SHARED_VARS_ARGS) MSL_SHARED_VARS_ASSIGN {}\n"; /* Class Closing Bracket to end shader global scope. */ ss_compute << "};" << std::endl; @@ -2440,28 +2116,9 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub() out << this->generate_msl_compute_inputs_string(); out << ") {" << std::endl << std::endl; - /* Generate Compute shader instance constructor. If shared memory blocks are used, - * these must be declared and then passed into the constructor. */ - std::string stage_instance_constructor = ""; - bool first = true; - if (shared_memory_blocks.size() > 0) { - stage_instance_constructor += "("; - for (const MSLSharedMemoryBlock &block : shared_memory_blocks) { - if (block.is_array) { - out << "TG " << block.type_name << " " << block.varname << block.array_decl << ";"; - } - else { - out << "TG " << block.type_name << " " << block.varname << ";"; - } - stage_instance_constructor += ((!first) ? "," : "") + block.varname; - first = false; - - out << std::endl; - } - stage_instance_constructor += ")"; - } + out << "MSL_SHARED_VARS_DECLARE\n"; out << "\t" << get_stage_class_name(ShaderStage::COMPUTE) << " " << shader_stage_inst_name - << stage_instance_constructor << ";" << std::endl; + << " MSL_SHARED_VARS_PASS;\n"; /* Copy global variables. */ /* Entry point parameters for gl Globals. */ @@ -3584,9 +3241,10 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_ if (tex_buf_id != -1) { MSLBufferBlock &ssbo = this->storage_blocks[tex_buf_id]; out << "\t" << get_shader_stage_instance_name(shader_stage) << "." - << this->texture_samplers[i].name << ".buffer = " << ssbo.name << ";" << std::endl; + << this->texture_samplers[i].name << ".atomic.buffer = " << ssbo.name << ";" + << std::endl; out << "\t" << get_shader_stage_instance_name(shader_stage) << "." - << this->texture_samplers[i].name << ".aligned_width = uniforms->" + << this->texture_samplers[i].name << ".atomic.aligned_width = uniforms->" << this->texture_samplers[i].name << "_metadata.w;" << std::endl; /* Buffer-backed 2D Array and 3D texture types are not natively supported so texture size @@ -3598,7 +3256,7 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_ ImageType::INT_3D_ATOMIC)) { out << "\t" << get_shader_stage_instance_name(shader_stage) << "." - << this->texture_samplers[i].name << ".texture_size = ushort3(uniforms->" + << this->texture_samplers[i].name << ".atomic.texture_size = ushort3(uniforms->" << this->texture_samplers[i].name << "_metadata.xyz);" << std::endl; } } @@ -4022,128 +3680,128 @@ std::string MSLTextureResource::get_msl_wrapper_type_str() const /* Add Types as needed. */ switch (this->type) { case ImageType::FLOAT_1D: { - return "_mtl_combined_image_sampler_1d"; + return "_mtl_sampler_1d"; } case ImageType::FLOAT_2D: { - return "_mtl_combined_image_sampler_2d"; + return "_mtl_sampler_2d"; } case ImageType::FLOAT_3D: { - return "_mtl_combined_image_sampler_3d"; + return "_mtl_sampler_3d"; } case ImageType::FLOAT_CUBE: { - return "_mtl_combined_image_sampler_cube"; + return "_mtl_sampler_cube"; } case ImageType::FLOAT_1D_ARRAY: { - return "_mtl_combined_image_sampler_1d_array"; + return "_mtl_sampler_1d_array"; } case ImageType::FLOAT_2D_ARRAY: { - return "_mtl_combined_image_sampler_2d_array"; + return "_mtl_sampler_2d_array"; } case ImageType::FLOAT_CUBE_ARRAY: { - return "_mtl_combined_image_sampler_cube_array"; + return "_mtl_sampler_cube_array"; } case ImageType::FLOAT_BUFFER: { - return "_mtl_combined_image_sampler_buffer"; + return "_mtl_sampler_buffer"; } case ImageType::DEPTH_2D: { - return "_mtl_combined_image_sampler_depth_2d"; + return "_mtl_sampler_depth_2d"; } case ImageType::SHADOW_2D: { - return "_mtl_combined_image_sampler_depth_2d"; + return "_mtl_sampler_depth_2d"; } case ImageType::DEPTH_2D_ARRAY: { - return "_mtl_combined_image_sampler_depth_2d_array"; + return "_mtl_sampler_depth_2d_array"; } case ImageType::SHADOW_2D_ARRAY: { - return "_mtl_combined_image_sampler_depth_2d_array"; + return "_mtl_sampler_depth_2d_array"; } case ImageType::DEPTH_CUBE: { - return "_mtl_combined_image_sampler_depth_cube"; + return "_mtl_sampler_depth_cube"; } case ImageType::SHADOW_CUBE: { - return "_mtl_combined_image_sampler_depth_cube"; + return "_mtl_sampler_depth_cube"; } case ImageType::DEPTH_CUBE_ARRAY: { - return "_mtl_combined_image_sampler_depth_cube_array"; + return "_mtl_sampler_depth_cube_array"; } case ImageType::SHADOW_CUBE_ARRAY: { - return "_mtl_combined_image_sampler_depth_cube_array"; + return "_mtl_sampler_depth_cube_array"; } case ImageType::INT_1D: { - return "_mtl_combined_image_sampler_1d"; + return "_mtl_sampler_1d"; } case ImageType::INT_2D: { - return "_mtl_combined_image_sampler_2d"; + return "_mtl_sampler_2d"; } case ImageType::INT_3D: { - return "_mtl_combined_image_sampler_3d"; + return "_mtl_sampler_3d"; } case ImageType::INT_CUBE: { - return "_mtl_combined_image_sampler_cube"; + return "_mtl_sampler_cube"; } case ImageType::INT_1D_ARRAY: { - return "_mtl_combined_image_sampler_1d_array"; + return "_mtl_sampler_1d_array"; } case ImageType::INT_2D_ARRAY: { - return "_mtl_combined_image_sampler_2d_array"; + return "_mtl_sampler_2d_array"; } case ImageType::INT_CUBE_ARRAY: { - return "_mtl_combined_image_sampler_cube_array"; + return "_mtl_sampler_cube_array"; } case ImageType::INT_BUFFER: { - return "_mtl_combined_image_sampler_buffer"; + return "_mtl_sampler_buffer"; } case ImageType::UINT_1D: { - return "_mtl_combined_image_sampler_1d"; + return "_mtl_sampler_1d"; } case ImageType::UINT_2D: { - return "_mtl_combined_image_sampler_2d"; + return "_mtl_sampler_2d"; } case ImageType::UINT_3D: { - return "_mtl_combined_image_sampler_3d"; + return "_mtl_sampler_3d"; } case ImageType::UINT_CUBE: { - return "_mtl_combined_image_sampler_cube"; + return "_mtl_sampler_cube"; } case ImageType::UINT_1D_ARRAY: { - return "_mtl_combined_image_sampler_1d_array"; + return "_mtl_sampler_1d_array"; } case ImageType::UINT_2D_ARRAY: { - return "_mtl_combined_image_sampler_2d_array"; + return "_mtl_sampler_2d_array"; } case ImageType::UINT_CUBE_ARRAY: { - return "_mtl_combined_image_sampler_cube_array"; + return "_mtl_sampler_cube_array"; } case ImageType::UINT_BUFFER: { - return "_mtl_combined_image_sampler_buffer"; + return "_mtl_sampler_buffer"; } /* If native texture atomics are unsupported, map types to fallback atomic structures which * contain a buffer pointer and metadata members for size and alignment. */ case ImageType::INT_2D_ATOMIC: case ImageType::UINT_2D_ATOMIC: { if (supports_native_atomics) { - return "_mtl_combined_image_sampler_2d"; + return "_mtl_sampler_2d"; } else { - return "_mtl_combined_image_sampler_2d_atomic_fallback"; + return "_mtl_sampler_2d_atomic"; } } case ImageType::INT_3D_ATOMIC: case ImageType::UINT_3D_ATOMIC: { if (supports_native_atomics) { - return "_mtl_combined_image_sampler_3d"; + return "_mtl_sampler_3d"; } else { - return "_mtl_combined_image_sampler_3d_atomic_fallback"; + return "_mtl_sampler_3d_atomic"; } } case ImageType::INT_2D_ARRAY_ATOMIC: case ImageType::UINT_2D_ARRAY_ATOMIC: { if (supports_native_atomics) { - return "_mtl_combined_image_sampler_2d_array"; + return "_mtl_sampler_2d_array"; } else { - return "_mtl_combined_image_sampler_2d_array_atomic_fallback"; + return "_mtl_sampler_2d_array_atomic"; } } default: { diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index e5885398a4e..1329916f9a1 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -427,24 +427,6 @@ static void detect_workarounds() { GLContext::unused_fb_slot_workaround = true; } - /* dFdx/dFdy calculation factors, those are dependent on driver. */ - if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY) && strstr(version, "3.3.10750")) - { - GLContext::derivative_signs[0] = 1.0; - GLContext::derivative_signs[1] = -1.0; - } - else if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_ANY)) { - if (strstr(version, "4.0.0 - Build 10.18.10.3308") || - strstr(version, "4.0.0 - Build 9.18.10.3186") || - strstr(version, "4.0.0 - Build 9.18.10.3165") || - strstr(version, "3.1.0 - Build 9.17.10.3347") || - strstr(version, "3.1.0 - Build 9.17.10.4101") || - strstr(version, "3.3.0 - Build 8.15.10.2618")) - { - GLContext::derivative_signs[0] = -1.0; - GLContext::derivative_signs[1] = 1.0; - } - } /* Draw shader parameters are broken on Qualcomm Windows ARM64 devices * on Mesa version < 24.0.0 */ @@ -560,7 +542,6 @@ bool GLContext::texture_filter_anisotropic_support = false; bool GLContext::debug_layer_workaround = false; bool GLContext::unused_fb_slot_workaround = false; bool GLContext::generate_mipmap_workaround = false; -float GLContext::derivative_signs[2] = {1.0f, 1.0f}; void GLBackend::capabilities_init() { diff --git a/source/blender/gpu/opengl/gl_context.hh b/source/blender/gpu/opengl/gl_context.hh index d59ebab9903..ffecd248603 100644 --- a/source/blender/gpu/opengl/gl_context.hh +++ b/source/blender/gpu/opengl/gl_context.hh @@ -67,7 +67,6 @@ class GLContext : public Context { static bool debug_layer_workaround; static bool unused_fb_slot_workaround; static bool generate_mipmap_workaround; - static float derivative_signs[2]; /** VBO for missing vertex attribute binding. Avoid undefined behavior on some implementation. */ GLuint default_attr_vbo_; diff --git a/source/blender/gpu/opengl/gl_shader.cc b/source/blender/gpu/opengl/gl_shader.cc index 527d893c8e6..7a29d3bf684 100644 --- a/source/blender/gpu/opengl/gl_shader.cc +++ b/source/blender/gpu/opengl/gl_shader.cc @@ -1059,10 +1059,6 @@ static const char *glsl_patch_default_get() /* Array compatibility. */ ss << "#define gpu_Array(_type) _type[]\n"; - /* Derivative sign can change depending on implementation. */ - ss << "#define DFDX_SIGN " << std::setprecision(2) << GLContext::derivative_signs[0] << "\n"; - ss << "#define DFDY_SIGN " << std::setprecision(2) << GLContext::derivative_signs[1] << "\n"; - /* GLSL Backend Lib. */ ss << datatoc_glsl_shader_defines_glsl; diff --git a/source/blender/gpu/shaders/common/gpu_shader_math_matrix_lib.glsl b/source/blender/gpu/shaders/common/gpu_shader_math_matrix_lib.glsl index ecc5fc7d0fd..fcb5e0c6b87 100644 --- a/source/blender/gpu/shaders/common/gpu_shader_math_matrix_lib.glsl +++ b/source/blender/gpu/shaders/common/gpu_shader_math_matrix_lib.glsl @@ -803,7 +803,7 @@ mat3x3 rotate(mat3x3 mat, EulerXYZ rotation) mat4x4 rotate(mat4x4 mat, AxisAngle rotation) { - mat4x4 result = mat4x4(rotate(mat3x3(mat), rotation)); + mat4x4 result = to_float4x4(rotate(to_float3x3(mat), rotation)); result[0][3] = mat[0][3]; result[1][3] = mat[1][3]; result[2][3] = mat[2][3]; @@ -815,7 +815,7 @@ mat4x4 rotate(mat4x4 mat, AxisAngle rotation) } mat4x4 rotate(mat4x4 mat, EulerXYZ rotation) { - mat4x4 result = mat4x4(rotate(mat3x3(mat), rotation)); + mat4x4 result = to_float4x4(rotate(to_float3x3(mat), rotation)); result[0][3] = mat[0][3]; result[1][3] = mat[1][3]; result[2][3] = mat[2][3]; @@ -992,38 +992,38 @@ mat3x3 from_rot_scale(AxisAngle rotation, vec3 scale) mat4x4 from_loc_rot(vec3 location, EulerXYZ rotation) { - mat4x4 ret = mat4x4(from_rotation(rotation)); + mat4x4 ret = to_float4x4(from_rotation(rotation)); ret[3].xyz = location; return ret; } mat4x4 from_loc_rot(vec3 location, Quaternion rotation) { - mat4x4 ret = mat4x4(from_rotation(rotation)); + mat4x4 ret = to_float4x4(from_rotation(rotation)); ret[3].xyz = location; return ret; } mat4x4 from_loc_rot(vec3 location, AxisAngle rotation) { - mat4x4 ret = mat4x4(from_rotation(rotation)); + mat4x4 ret = to_float4x4(from_rotation(rotation)); ret[3].xyz = location; return ret; } mat4x4 from_loc_rot_scale(vec3 location, EulerXYZ rotation, vec3 scale) { - mat4x4 ret = mat4x4(from_rot_scale(rotation, scale)); + mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale)); ret[3].xyz = location; return ret; } mat4x4 from_loc_rot_scale(vec3 location, Quaternion rotation, vec3 scale) { - mat4x4 ret = mat4x4(from_rot_scale(rotation, scale)); + mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale)); ret[3].xyz = location; return ret; } mat4x4 from_loc_rot_scale(vec3 location, AxisAngle rotation, vec3 scale) { - mat4x4 ret = mat4x4(from_rot_scale(rotation, scale)); + mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale)); ret[3].xyz = location; return ret; } @@ -1079,11 +1079,11 @@ EulerXYZ to_euler(mat3x3 mat, const bool normalized) } EulerXYZ to_euler(mat4x4 mat) { - return to_euler(mat3(mat)); + return to_euler(to_float3x3(mat)); } EulerXYZ to_euler(mat4x4 mat, const bool normalized) { - return to_euler(mat3(mat), normalized); + return to_euler(to_float3x3(mat), normalized); } Quaternion normalized_to_quat_fast(mat3 mat) @@ -1193,11 +1193,11 @@ Quaternion to_quaternion(mat3x3 mat, const bool normalized) } Quaternion to_quaternion(mat4x4 mat) { - return to_quaternion(mat3(mat)); + return to_quaternion(to_float3x3(mat)); } Quaternion to_quaternion(mat4x4 mat, const bool normalized) { - return to_quaternion(mat3(mat), normalized); + return to_quaternion(to_float3x3(mat), normalized); } vec3 to_scale(mat3x3 mat) @@ -1216,11 +1216,11 @@ vec3 to_scale(mat3x3 mat, const bool allow_negative_scale) } vec3 to_scale(mat4x4 mat) { - return to_scale(mat3(mat)); + return to_scale(to_float3x3(mat)); } vec3 to_scale(mat4x4 mat, const bool allow_negative_scale) { - return to_scale(mat3(mat), allow_negative_scale); + return to_scale(to_float3x3(mat), allow_negative_scale); } void to_rot_scale(mat3x3 mat, out EulerXYZ r_rotation, out vec3 r_scale) @@ -1265,7 +1265,7 @@ void to_rot_scale(mat3x3 mat, void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, out EulerXYZ r_rotation, out vec3 r_scale) { r_location = mat[3].xyz; - to_rot_scale(mat3(mat), r_rotation, r_scale); + to_rot_scale(to_float3x3(mat), r_rotation, r_scale); } void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, @@ -1274,12 +1274,12 @@ void to_loc_rot_scale(mat4x4 mat, const bool allow_negative_scale) { r_location = mat[3].xyz; - to_rot_scale(mat3(mat), r_rotation, r_scale, allow_negative_scale); + to_rot_scale(to_float3x3(mat), r_rotation, r_scale, allow_negative_scale); } void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, out Quaternion r_rotation, out vec3 r_scale) { r_location = mat[3].xyz; - to_rot_scale(mat3(mat), r_rotation, r_scale); + to_rot_scale(to_float3x3(mat), r_rotation, r_scale); } void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, @@ -1288,7 +1288,7 @@ void to_loc_rot_scale(mat4x4 mat, const bool allow_negative_scale) { r_location = mat[3].xyz; - to_rot_scale(mat3(mat), r_rotation, r_scale, allow_negative_scale); + to_rot_scale(to_float3x3(mat), r_rotation, r_scale, allow_negative_scale); } vec3 transform_point(mat3x3 mat, vec3 point) @@ -1308,7 +1308,7 @@ vec3 transform_direction(mat3x3 mat, vec3 direction) vec3 transform_direction(mat4x4 mat, vec3 direction) { - return mat3x3(mat) * direction; + return to_float3x3(mat) * direction; } vec2 project_point(mat3x3 mat, vec2 point) @@ -1423,7 +1423,7 @@ bool is_negative(mat3x3 mat) } bool is_negative(mat4x4 mat) { - return is_negative(mat3x3(mat)); + return is_negative(to_float3x3(mat)); } bool is_equal(mat2x2 a, mat2x2 b, float epsilon) @@ -1505,15 +1505,15 @@ bool is_uniformly_scaled(mat3x3 mat) bool is_orthogonal(mat4x4 mat) { - return is_orthogonal(mat3x3(mat)); + return is_orthogonal(to_float3x3(mat)); } bool is_orthonormal(mat4x4 mat) { - return is_orthonormal(mat3x3(mat)); + return is_orthonormal(to_float3x3(mat)); } bool is_uniformly_scaled(mat4x4 mat) { - return is_uniformly_scaled(mat3x3(mat)); + return is_uniformly_scaled(to_float3x3(mat)); } /* Returns true if each individual columns are unit scaled. Mainly for assert usage. */ diff --git a/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl index 9241a1d00f7..e1e56f8f391 100644 --- a/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl +++ b/source/blender/gpu/shaders/gpu_shader_2D_widget_base_vert.glsl @@ -85,12 +85,15 @@ vec2 do_tria() float size = (tria2) ? -tria2Size : tria1Size; vec2 center = (tria2) ? tria2Center : tria1Center; - vec2 arrow_pos[4] = vec2[4](vec2(0.0, 0.6), vec2(0.6, 0.0), vec2(-0.6, 0.0), vec2(0.0, -0.6)); + vec2 arrow_pos[4] = float2_array( + vec2(0.0, 0.6), vec2(0.6, 0.0), vec2(-0.6, 0.0), vec2(0.0, -0.6)); /* Rotated uv space by 45deg and mirrored. */ - vec2 arrow_uvs[4] = vec2[4](vec2(0.0, 0.85), vec2(0.85, 0.85), vec2(0.0, 0.0), vec2(0.0, 0.85)); + vec2 arrow_uvs[4] = float2_array( + vec2(0.0, 0.85), vec2(0.85, 0.85), vec2(0.0, 0.0), vec2(0.0, 0.85)); - vec2 point_pos[4] = vec2[4](vec2(-1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, -1.0), vec2(1.0, 1.0)); - vec2 point_uvs[4] = vec2[4](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 0.0), vec2(1.0, 1.0)); + vec2 point_pos[4] = float2_array( + vec2(-1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, -1.0), vec2(1.0, 1.0)); + vec2 point_uvs[4] = float2_array(vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 0.0), vec2(1.0, 1.0)); /* We reuse the SDF round-box rendering of widget to render the tria shapes. * This means we do clever tricks to position the rectangle the way we want using diff --git a/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl b/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl index a6de1a0de02..14f66848d70 100644 --- a/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl +++ b/source/blender/gpu/shaders/gpu_shader_2D_widget_shadow_vert.glsl @@ -30,44 +30,44 @@ void main() { /* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader * memory pressure. */ - const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0), - vec2(0.02, 0.805), - vec2(0.067, 0.617), - vec2(0.169, 0.45), - vec2(0.293, 0.293), - vec2(0.45, 0.169), - vec2(0.617, 0.076), - vec2(0.805, 0.02), - vec2(1.0, 0.0), - vec2(-1.0, 0.0), - vec2(-0.805, 0.02), - vec2(-0.617, 0.067), - vec2(-0.45, 0.169), - vec2(-0.293, 0.293), - vec2(-0.169, 0.45), - vec2(-0.076, 0.617), - vec2(-0.02, 0.805), - vec2(0.0, 1.0), - vec2(0.0, -1.0), - vec2(-0.02, -0.805), - vec2(-0.067, -0.617), - vec2(-0.169, -0.45), - vec2(-0.293, -0.293), - vec2(-0.45, -0.169), - vec2(-0.617, -0.076), - vec2(-0.805, -0.02), - vec2(-1.0, 0.0), - vec2(1.0, 0.0), - vec2(0.805, -0.02), - vec2(0.617, -0.067), - vec2(0.45, -0.169), - vec2(0.293, -0.293), - vec2(0.169, -0.45), - vec2(0.076, -0.617), - vec2(0.02, -0.805), - vec2(0.0, -1.0)); + const vec2 cornervec[36] = float2_array(vec2(0.0, 1.0), + vec2(0.02, 0.805), + vec2(0.067, 0.617), + vec2(0.169, 0.45), + vec2(0.293, 0.293), + vec2(0.45, 0.169), + vec2(0.617, 0.076), + vec2(0.805, 0.02), + vec2(1.0, 0.0), + vec2(-1.0, 0.0), + vec2(-0.805, 0.02), + vec2(-0.617, 0.067), + vec2(-0.45, 0.169), + vec2(-0.293, 0.293), + vec2(-0.169, 0.45), + vec2(-0.076, 0.617), + vec2(-0.02, 0.805), + vec2(0.0, 1.0), + vec2(0.0, -1.0), + vec2(-0.02, -0.805), + vec2(-0.067, -0.617), + vec2(-0.169, -0.45), + vec2(-0.293, -0.293), + vec2(-0.45, -0.169), + vec2(-0.617, -0.076), + vec2(-0.805, -0.02), + vec2(-1.0, 0.0), + vec2(1.0, 0.0), + vec2(0.805, -0.02), + vec2(0.617, -0.067), + vec2(0.45, -0.169), + vec2(0.293, -0.293), + vec2(0.169, -0.45), + vec2(0.076, -0.617), + vec2(0.02, -0.805), + vec2(0.0, -1.0)); - const vec2 center_offset[4] = vec2[4]( + const vec2 center_offset[4] = float2_array( vec2(1.0, 1.0), vec2(-1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); uint cflag = vflag & CNR_FLAG_RANGE; diff --git a/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl b/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl index f275668e3c5..8b79f051d30 100644 --- a/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl +++ b/source/blender/gpu/shaders/gpu_shader_codegen_lib.glsl @@ -338,8 +338,8 @@ vec3 dF_impl(vec3 v) void dF_branch(float fn, out vec2 result) { - result.x = DFDX_SIGN * dFdx(fn); - result.y = DFDY_SIGN * dFdy(fn); + result.x = dFdx(fn); + result.y = dFdy(fn); } #else @@ -349,10 +349,10 @@ int g_derivative_flag = 0; vec3 dF_impl(vec3 v) { if (g_derivative_flag > 0) { - return DFDX_SIGN * dFdx(v); + return dFdx(v); } else if (g_derivative_flag < 0) { - return DFDY_SIGN * dFdy(v); + return dFdy(v); } return vec3(0.0); } diff --git a/source/blender/gpu/shaders/gpu_shader_text_frag.glsl b/source/blender/gpu/shaders/gpu_shader_text_frag.glsl index 1cc9c6dda1a..180c84b388b 100644 --- a/source/blender/gpu/shaders/gpu_shader_text_frag.glsl +++ b/source/blender/gpu/shaders/gpu_shader_text_frag.glsl @@ -133,7 +133,7 @@ void main() /* 3x3 blur */ /* clang-format off */ - const float weights3x3[16] = float[16]( + const float weights3x3[16] = float_array( 1.0, 2.0, 1.0, 0.0, 2.0, 4.0, 2.0, 0.0, 1.0, 2.0, 1.0, 0.0, @@ -169,7 +169,7 @@ void main() /* 5x5 blur */ /* clang-format off */ - const float weights5x5[36] = float[36]( + const float weights5x5[36] = float_array( 1.0, 2.0, 2.0, 2.0, 1.0, 0.0, 2.0, 5.0, 6.0, 5.0, 2.0, 0.0, 2.0, 6.0, 8.0, 6.0, 2.0, 0.0, diff --git a/source/blender/gpu/shaders/material/gpu_shader_material_transform_utils.glsl b/source/blender/gpu/shaders/material/gpu_shader_material_transform_utils.glsl index aa33f4bf2a0..6a4ca2e2f10 100644 --- a/source/blender/gpu/shaders/material/gpu_shader_material_transform_utils.glsl +++ b/source/blender/gpu/shaders/material/gpu_shader_material_transform_utils.glsl @@ -7,45 +7,45 @@ void normal_transform_object_to_world(vec3 vin, out vec3 vout) { /* Expansion of NormalMatrix. */ - vout = vin * mat3(ModelMatrixInverse); + vout = vin * to_float3x3(ModelMatrixInverse); } void normal_transform_world_to_object(vec3 vin, out vec3 vout) { /* Expansion of NormalMatrixInverse. */ - vout = vin * mat3(ModelMatrix); + vout = vin * to_float3x3(ModelMatrix); } void direction_transform_object_to_world(vec3 vin, out vec3 vout) { - vout = mat3x3(ModelMatrix) * vin; + vout = to_float3x3(ModelMatrix) * vin; } void direction_transform_object_to_view(vec3 vin, out vec3 vout) { - vout = mat3x3(ModelMatrix) * vin; - vout = mat3x3(ViewMatrix) * vout; + vout = to_float3x3(ModelMatrix) * vin; + vout = to_float3x3(ViewMatrix) * vout; } void direction_transform_view_to_world(vec3 vin, out vec3 vout) { - vout = mat3x3(ViewMatrixInverse) * vin; + vout = to_float3x3(ViewMatrixInverse) * vin; } void direction_transform_view_to_object(vec3 vin, out vec3 vout) { - vout = mat3x3(ViewMatrixInverse) * vin; - vout = mat3x3(ModelMatrixInverse) * vout; + vout = to_float3x3(ViewMatrixInverse) * vin; + vout = to_float3x3(ModelMatrixInverse) * vout; } void direction_transform_world_to_view(vec3 vin, out vec3 vout) { - vout = mat3x3(ViewMatrix) * vin; + vout = to_float3x3(ViewMatrix) * vin; } void direction_transform_world_to_object(vec3 vin, out vec3 vout) { - vout = mat3x3(ModelMatrixInverse) * vin; + vout = to_float3x3(ModelMatrixInverse) * vin; } void point_transform_object_to_world(vec3 vin, out vec3 vout) diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl index c2bb0ead490..61d8c991895 100644 --- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -13,13 +13,37 @@ #pragma clang diagnostic ignored "-Wunused-variable" #pragma clang diagnostic ignored "-Wcomment" +#define ENABLE_IF(cond) thread metal::enable_if_t<(cond)> * = nullptr + /* Base instance with offsets. */ #define gpu_BaseInstance gl_BaseInstanceARB #define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance) -/* derivative signs. */ -#define DFDX_SIGN 1.0 -#define DFDY_SIGN 1.0 +#ifdef MTL_WORKGROUP_SIZE_X +/* Older Metal compiler version don't treat vector component access as constexpr. + * We have to make a wrapper class for that otherwise we cannot use WorkGroupSize for sizing + * threadgroup arrays. Note that this bug is not present in the version 4.1 of the compiler. */ +struct mtl_WorkGroupSize { + union { + struct { + uint x, y, z; + }; + uint2 xy; + uint3 xyz; + }; + + constexpr mtl_WorkGroupSize() + : x(MTL_WORKGROUP_SIZE_X), y(MTL_WORKGROUP_SIZE_Y), z(MTL_WORKGROUP_SIZE_Z) + { + } + + constexpr inline operator uint3() const + { + return xyz; + } +}; +# define gl_WorkGroupSize mtl_WorkGroupSize() +#endif /* Type definitions. */ /* int implicitly cast to bool in MSL. */ @@ -53,64 +77,9 @@ using bvec3 = bool3; using bvec4 = bool4; /* Compute decorators. */ -#define TG threadgroup #define barrier() \ threadgroup_barrier(mem_flags::mem_threadgroup | mem_flags::mem_device | mem_flags::mem_texture) -#ifdef MTL_USE_WORKGROUP_SIZE -/* Compute work-group size. */ -struct constexp_uvec3 { - /* Type union to cover all syntax accessors: - * .x, .y, .z, .xy, .xyz - * Swizzle types invalid.*/ - union { - struct { - uint x, y, z; - }; - struct { - uint2 xy; - }; - uint3 xyz; - }; - - constexpr constexp_uvec3(uint _x, uint _y, uint _z) : x(_x), y(_y), z(_z) {} - constexpr uint operator[](int i) - { - /* Note: Need to switch on each elem value as array accessor triggers - * non-constant sizing error. This will be statically evaluated at compile time. */ - switch (i) { - case 0: - return x; - case 1: - return y; - case 2: - return z; - default: - return 0; - } - } - constexpr inline operator uint3() const - { - return xyz; - } - constexpr inline operator uint2() const - { - return xy; - } - constexpr inline operator uint() const - { - return x; - } -}; - -constexpr constexp_uvec3 __internal_workgroupsize_get() -{ - return constexp_uvec3(MTL_WORKGROUP_SIZE_X, MTL_WORKGROUP_SIZE_Y, MTL_WORKGROUP_SIZE_Z); -} - -# define gl_WorkGroupSize __internal_workgroupsize_get() -#endif - /** Shader atomics: * In order to emulate GLSL-style atomic operations, wherein variables can be used within atomic * operations, even if they are not explicitly declared atomic, we can cast the pointer to atomic, @@ -118,162 +87,652 @@ constexpr constexp_uvec3 __internal_workgroupsize_get() * * NOTE: We cannot hoist the address space into the template declaration, so these must be declared * for each relevant address space. */ +#define ATOMIC_OP_EX(qualifier, glsl_op, mtl_op) \ + template T atomic##glsl_op(qualifier T &mem, T data) \ + { \ + return atomic_##mtl_op##_explicit((qualifier _atomic *)&mem, data, memory_order_relaxed); \ + } -/* Thread-group memory. */ -template T atomicMax(threadgroup T &mem, T data) -{ - return atomic_fetch_max_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicMin(threadgroup T &mem, T data) -{ - return atomic_fetch_min_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicAdd(threadgroup T &mem, T data) -{ - return atomic_fetch_add_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicSub(threadgroup T &mem, T data) -{ - return atomic_fetch_sub_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicAnd(threadgroup T &mem, T data) -{ - return atomic_fetch_and_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicOr(threadgroup T &mem, T data) -{ - return atomic_fetch_or_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicXor(threadgroup T &mem, T data) -{ - return atomic_fetch_xor_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicExchange(threadgroup T &mem, T data) -{ - return atomic_exchange_explicit((threadgroup _atomic *)&mem, data, memory_order_relaxed); -} +#define ATOMIC_OP(glsl_op, mtl_op) \ + ATOMIC_OP_EX(threadgroup, glsl_op, mtl_op) \ + ATOMIC_OP_EX(device, glsl_op, mtl_op) -/* Device memory. */ -template T atomicMax(device T &mem, T data) -{ - return atomic_fetch_max_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicMin(device T &mem, T data) -{ - return atomic_fetch_min_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicAdd(device T &mem, T data) -{ - return atomic_fetch_add_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicSub(device T &mem, T data) -{ - return atomic_fetch_sub_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicAnd(device T &mem, T data) -{ - return atomic_fetch_and_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicOr(device T &mem, T data) -{ - return atomic_fetch_or_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicXor(device T &mem, T data) -{ - return atomic_fetch_xor_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} -template T atomicExchange(device T &mem, T data) -{ - return atomic_exchange_explicit((device _atomic *)&mem, data, memory_order_relaxed); -} +ATOMIC_OP(Max, fetch_max) +ATOMIC_OP(Min, fetch_min) +ATOMIC_OP(Add, fetch_add) +ATOMIC_OP(Sub, fetch_sub) +ATOMIC_OP(And, fetch_and) +ATOMIC_OP(Or, fetch_or) +ATOMIC_OP(Xor, fetch_xor) +ATOMIC_OP(Exchange, exchange) -/* Used to replace 'out' in function parameters with thread-local reference - * shortened to avoid expanding the GLSL source string. */ -#define THD thread -#define OUT(type, name, array) thread type(&name)[array] +#undef ATOMIC_OP /* Generate wrapper structs for combined texture and sampler type. */ #ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS -# define SAMPLER_DECLARATION constant sampler *samp; +using sampler_ptr = constant sampler *; #else -# define SAMPLER_DECLARATION thread sampler *samp; +using sampler_ptr = thread sampler *; #endif -#define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \ - template struct STRUCT_NAME { \ - thread TEX_TYPE *texture; \ - SAMPLER_DECLARATION \ +/* Use point sampler instead of texture read to benefit from texture caching and reduce branching + * through removal of bounds tests, as these are handled by the sample operation. */ +constexpr sampler _mtl_fetch_samp(address::clamp_to_zero, filter::nearest, coord::pixel); + +template +struct _mtl_sampler { + + template + using vec_or_scalar = typename metal::conditional>::type; + + using FltCoord = vec_or_scalar; + using FltDeriv = vec_or_scalar; + using IntCoord = vec_or_scalar; + using IntDeriv = vec_or_scalar; + using UintCoord = vec_or_scalar; + using UintDeriv = vec_or_scalar; + using SizeVec = vec_or_scalar; + using DataVec = vec; + using AtomicT = T; + + /* Template compatible gradient type choosing. */ + template struct gradient_n {}; + /* clang-format off */ + template<> struct gradient_n<2, 0> { using type = gradient2d; }; + template<> struct gradient_n<3, 0> { using type = gradient3d; }; + template<> struct gradient_n<2, 1> { using type = gradientcube; }; + /* clang-format on */ + /* Using `using` would invalidate the whole class. */ +#define gradient typename gradient_n::type + +#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS + /* If native texture atomics are unsupported, we instead utilize a custom type which wraps a + * buffer-backed texture. This texture will always be a Texture2D, but will emulate access to + * Texture3D and Texture2DArray by stacking layers. + * Access pattern will be derived based on the source type. 2DArray and 3D atomic texture + * support will require information on the size of each layer within the source 2D texture. + * + * A device pointer to the backing buffer will also be available for the atomic operations. + * NOTE: For atomic ops, it will only be valid to use access::read_write. + * We still need to use the wrapped type for access:sample, as texture2DArray and texture3D + * will require access indirection. + * + * NOTE: Only type of UINT is valid, but full template provided to match syntax of standard + * textures. */ + template struct AtomicEmulation {}; + + template struct AtomicEmulation { + /** Buffer to do atomic operations on. */ + device U *buffer; + /* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater + * than the texture's native width to satisfy device alignment rules. We need to use the padded + * width when writing to ensure the correct writing location aligns with a given pixel location + * in the texture. */ + ushort aligned_width; + + int2 to_internal_coord(IntCoord coord) const + { + return coord.xy; + } + uint to_linear_coord(IntCoord coord) const + { + return coord.x + coord.y * aligned_width; + } + }; + + template struct AtomicEmulation { + /** Buffer to do atomic operations on. */ + device U *buffer; + /** Required to pixel location inside the backing texture 2D space, and texture size query. */ + ushort3 texture_size; + /* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater + * than the texture's native width to satisfy device alignment rules. We need to use the padded + * width when writing to ensure the correct writing location aligns with a given pixel location + * in the texture. */ + ushort aligned_width; + + int2 to_internal_coord(IntCoord coord) const + { + return int2(coord.x, coord.y + texture_size.y * coord.z); + } + uint to_linear_coord(IntCoord coord) const + { + uint row = coord.y + coord.z * texture_size.y; + return coord.x + row * aligned_width; + } + }; + + AtomicEmulation atomic; +#endif + + thread TextureT *texture; + sampler_ptr samp; + + template SizeVec size_impl(uint lod) const {} + template<> SizeVec size_impl<1, 0>(uint lod) const + { + return SizeVec(texture->get_width()); + } + template<> SizeVec size_impl<1, 1>(uint lod) const + { + return SizeVec(texture->get_width(), texture->get_array_size()); + } + template<> SizeVec size_impl<2, 0>(uint lod) const + { + return SizeVec(texture->get_width(lod), texture->get_height(lod)); + } + template<> SizeVec size_impl<2, 1>(uint lod) const + { + return SizeVec(texture->get_width(lod), texture->get_height(lod), texture->get_array_size()); + } + template<> SizeVec size_impl<3, 0>(uint lod) const + { + return SizeVec(texture->get_width(lod), texture->get_height(lod), texture->get_depth(lod)); + } +#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS + template<> SizeVec size_impl<2, 1, true>(uint lod) const + { + return SizeVec(atomic.texture_size); + } + template<> SizeVec size_impl<3, 0, true>(uint lod) const + { + return SizeVec(atomic.texture_size); + } +#endif + SizeVec size(int lod = 0) const + { + return size_impl(uint(lod)); } -/* If native texture atomics are unsupported, we instead utilize a custom type which wraps a - * buffer-backed texture. This texture will always be a Texture2D, but will emulate access to - * Texture3D and Texture2DArray by stacking layers. - * Access pattern will be derived based on the source type. 2DArray and 3D atomic texture - * support will require information on the size of each layer within the source 2D texture. - * - * A device pointer to the backing buffer will also be available for the atomic operations. - * NOTE: For atomic ops, it will only be valid to use access::read_write. - * We still need to use the wrapped type for access:sample, as texture2DArray and texture3D - * will require access indirection. - * - * NOTE: Only type of UINT is valid, but full template provided to match syntax of standard - * textures. */ +#define ARRAY_FN \ + template +#define NON_ARRAY_FN \ + template + + NON_ARRAY_FN DataVec sample(FltCoord coord) const + { + return texture->sample(*samp, coord); + } + NON_ARRAY_FN DataVec sample_grad(FltCoord coord, FltDeriv dPdx, FltDeriv dPdy) const + { + return texture->sample(*samp, coord, gradient(dPdx, dPdy)); + } + NON_ARRAY_FN DataVec sample_bias(FltCoord coord, bias lod_bias) const + { + return texture->sample(*samp, coord, lod_bias); + } + NON_ARRAY_FN DataVec sample_lod(FltCoord coord, level lod, IntDeriv offset = {0}) const + { + return texture->sample(*samp, coord, lod, offset); + } + NON_ARRAY_FN DataVec gather(FltCoord coord) const + { + return texture->gather(*samp, coord); + } + NON_ARRAY_FN DataVec fetch(IntCoord coord) const + { + return texture->sample(_mtl_fetch_samp, FltCoord(coord)); + } + NON_ARRAY_FN DataVec fetch(IntCoord coord, level lod, IntDeriv offset = {0}) const + { + return texture->sample(_mtl_fetch_samp, FltCoord(coord), lod, offset); + } + + ARRAY_FN DataVec sample(FltCoord coord) const + { + return texture->sample(*samp, uv_mask(coord), layer_mask(coord)); + } + ARRAY_FN DataVec sample_grad(FltCoord coord, FltDeriv dPdx, FltDeriv dPdy) const + { + return texture->sample(*samp, uv_mask(coord), layer_mask(coord), gradient(dPdx, dPdy)); + } + ARRAY_FN DataVec sample_bias(FltCoord coord, bias lod_bias) const + { + return texture->sample(*samp, uv_mask(coord), layer_mask(coord), lod_bias); + } + ARRAY_FN DataVec sample_lod(FltCoord coord, level lod, IntDeriv offset = {0}) const + { + return texture->sample(*samp, uv_mask(coord), layer_mask(coord), lod, offset); + } + ARRAY_FN DataVec gather(FltCoord coord) const + { + return texture->gather(*samp, uv_mask(coord), layer_mask(coord)); + } + ARRAY_FN DataVec fetch(IntCoord coord) const + { + return texture->sample(_mtl_fetch_samp, uv_mask(coord), layer_mask(coord)); + } + ARRAY_FN DataVec fetch(IntCoord coord, level lod, IntDeriv ofs = {0}) const + { + return texture->sample(_mtl_fetch_samp, uv_mask(coord), layer_mask(coord), lod, ofs); + } + +#undef gradient +#undef ARRAY_FN +#undef NON_ARRAY_FN + + /** + * Image functions. + * To be split to its own class. + */ + +#define ARRAY_FN \ + template +#define NON_ARRAY_FN \ + template + + NON_ARRAY_FN DataVec load(IntCoord coord) const + { + return texture->read(UintCoord(coord), 0); + } + NON_ARRAY_FN void store(DataVec data, IntCoord coord) const + { + texture->write(data, UintCoord(coord), 0); + } + + ARRAY_FN DataVec load(IntCoord coord) const + { + return texture->read(uv_mask_img(coord), layer_mask_img(coord), 0); + } + ARRAY_FN void store(DataVec data, IntCoord coord) const + { + texture->write(data, uv_mask_img(coord), layer_mask_img(coord), 0); + } + +#undef ARRAY_FN +#undef NON_ARRAY_FN + #ifndef MTL_SUPPORTS_TEXTURE_ATOMICS + /* Atomic samplers only support `textureFetch` as the texture layout doesn't allow filtering. */ -template -struct _mtl_combined_image_sampler_2d_atomic_fallback { - thread texture2d *texture; - SAMPLER_DECLARATION - device T *buffer; - /* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than - * the texture's native width to satisfy device alignment rules. We need to use the padded width - * when writing to ensure the - * correct writing location aligns with a given pixel location in the texture. */ - uint aligned_width; -}; +# define ATOMIC_FN template -template -struct _mtl_combined_image_sampler_2d_array_atomic_fallback { - thread texture2d *texture; - SAMPLER_DECLARATION - device T *buffer; - /* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than - * the texture's native width to satisfy device alignment rules. We need to use the padded width - * when writing to ensure the - * correct writing location aligns with a given pixel location in the texture. */ - uint aligned_width; - /* Texture size required to determine location offset of array layer with 2D texture space. */ - ushort3 texture_size; -}; + ATOMIC_FN DataVec fetch(IntCoord coord) const + { + int2 coord_2d = atomic.to_internal_coord(coord); + return texture->sample(_mtl_fetch_samp, float2(coord_2d)); + } + ATOMIC_FN DataVec fetch(IntCoord coord, level lod, IntDeriv offset = {0}) const + { + int2 coord_2d = atomic.to_internal_coord(coord); + return texture->sample(_mtl_fetch_samp, float2(coord_2d), lod, offset); + } -template -struct _mtl_combined_image_sampler_3d_atomic_fallback { - thread texture2d *texture; - SAMPLER_DECLARATION - device T *buffer; - /* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than - * the texture's native width to satisfy device alignment rules. We need to use the padded width - * when writing to ensure the - * correct writing location aligns with a given pixel location in the texture. */ - uint aligned_width; - /* Texture size required to determine location offset of array layer with 2D texture space. */ - ushort3 texture_size; -}; + ATOMIC_FN DataVec load(IntCoord coord) const + { + int2 coord_2d = atomic.to_internal_coord(coord); + return texture->read(uint2(coord_2d), 0); + } + ATOMIC_FN void store(DataVec data, IntCoord coord) const + { + int2 coord_2d = atomic.to_internal_coord(coord); + texture->write(data, uint2(coord_2d), 0); + } + +# undef ATOMIC_FN + + AtomicT atomic_min(IntCoord coord, AtomicT data) const + { + return atomicMin(atomic.buffer[atomic.to_linear_coord(coord)], data); + } + AtomicT atomic_max(IntCoord coord, AtomicT data) const + { + return atomicMax(atomic.buffer[atomic.to_linear_coord(coord)], data); + } + AtomicT atomic_add(IntCoord coord, AtomicT data) const + { + return atomicAdd(atomic.buffer[atomic.to_linear_coord(coord)], data); + } + AtomicT atomic_and(IntCoord coord, AtomicT data) const + { + return atomicAnd(atomic.buffer[atomic.to_linear_coord(coord)], data); + } + AtomicT atomic_or(IntCoord coord, AtomicT data) const + { + return atomicOr(atomic.buffer[atomic.to_linear_coord(coord)], data); + } + AtomicT atomic_xor(IntCoord coord, AtomicT data) const + { + return atomicXor(atomic.buffer[atomic.to_linear_coord(coord)], data); + } + AtomicT atomic_exchange(IntCoord coord, AtomicT data) const + { + return atomicExchange(atomic.buffer[atomic.to_linear_coord(coord)], data); + } + +#else +# define NON_ARRAY_ATOMIC \ + template == true), \ + ENABLE_IF(sizeof(U) == 4), \ + ENABLE_IF(Ar == 0)> +# define ARRAY_ATOMIC \ + template == true), \ + ENABLE_IF(sizeof(U) == 4), \ + ENABLE_IF(Ar == 1)> + + NON_ARRAY_ATOMIC AtomicT atomic_min(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_min(UintCoord(coord), data).x; + } + NON_ARRAY_ATOMIC AtomicT atomic_max(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_max(UintCoord(coord), data).x; + } + NON_ARRAY_ATOMIC AtomicT atomic_add(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_add(UintCoord(coord), data).x; + } + NON_ARRAY_ATOMIC AtomicT atomic_and(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_and(UintCoord(coord), data).x; + } + NON_ARRAY_ATOMIC AtomicT atomic_or(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_or(UintCoord(coord), data).x; + } + NON_ARRAY_ATOMIC AtomicT atomic_xor(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_xor(UintCoord(coord), data).x; + } + NON_ARRAY_ATOMIC AtomicT atomic_exchange(IntCoord coord, AtomicT data) const + { + return texture->atomic_exchange(UintCoord(coord), data).x; + } + + ARRAY_ATOMIC AtomicT atomic_min(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_min(uv_mask_img(coord), layer_mask_img(coord), data).x; + } + ARRAY_ATOMIC AtomicT atomic_max(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_max(uv_mask_img(coord), layer_mask_img(coord), data).x; + } + ARRAY_ATOMIC AtomicT atomic_add(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_add(uv_mask_img(coord), layer_mask_img(coord), data).x; + } + ARRAY_ATOMIC AtomicT atomic_and(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_and(uv_mask_img(coord), layer_mask_img(coord), data).x; + } + ARRAY_ATOMIC AtomicT atomic_or(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_or(uv_mask_img(coord), layer_mask_img(coord), data).x; + } + ARRAY_ATOMIC AtomicT atomic_xor(IntCoord coord, AtomicT data) const + { + return texture->atomic_fetch_xor(uv_mask_img(coord), layer_mask_img(coord), data).x; + } + ARRAY_ATOMIC AtomicT atomic_exchange(IntCoord coord, AtomicT data) const + { + return texture->atomic_exchange(uv_mask_img(coord), layer_mask_img(coord), data).x; + } + +# undef NON_ARRAY_ATOMIC +# undef ARRAY_ATOMIC #endif + void fence() + { + texture->fence(); + } + + private: + template static U reshape(V v) {} + /* clang-format off */ + template<> float reshape(float2 v) { return v.x; } + template<> float2 reshape(float3 v) { return v.xy; } + template<> float3 reshape(float4 v) { return v.xyz; } + template<> int reshape(int2 v) { return v.x; } + template<> int2 reshape(int3 v) { return v.xy; } + template<> int3 reshape(int4 v) { return v.xyz; } + /* clang-format on */ + + FltDeriv uv_mask(FltCoord coord) const + { + return reshape(coord); + } + FltDeriv uv_mask(IntCoord coord) const + { + return FltDeriv(reshape(coord)); + } + + uint layer_mask(FltCoord coord) const + { + return coord[Dim + Cube]; + } + uint layer_mask(IntCoord coord) const + { + return coord[Dim + Cube]; + } + + UintDeriv uv_mask_img(IntCoord coord) const + { + return UintDeriv(reshape(coord)); + } + + uint layer_mask_img(IntCoord coord) const + { + return coord[Dim + Cube]; + } +}; + +/** Sampler functions */ + +#define SAMPLER_FN \ + template + +SAMPLER_FN SizeVec textureSize(SamplerT texture, int lod) +{ + return texture.size(lod); +} + +SAMPLER_FN DataVec texture(SamplerT texture, FltCoord coord) +{ + return texture.sample(coord); +} + +SAMPLER_FN DataVec texture(SamplerT texture, FltCoord coord, float lod_bias) +{ + return texture.sample_bias(coord, bias(lod_bias)); +} + +SAMPLER_FN DataVec textureLod(SamplerT texture, FltCoord coord, float lod) +{ + return texture.sample_lod(coord, level(lod)); +} + +SAMPLER_FN DataVec textureLodOffset(SamplerT texture, FltCoord coord, float lod, IntDeriv offset) +{ + return texture.sample_lod(coord, level(lod), offset); +} + +SAMPLER_FN DataVec textureGather(SamplerT texture, FltCoord coord) +{ + return texture.gather(coord); +} + +SAMPLER_FN DataVec textureGrad(SamplerT texture, FltCoord coord, FltDeriv dPdx, FltDeriv dPdy) +{ + return texture.sample_grad(coord, dPdx, dPdy); +} + +SAMPLER_FN DataVec texelFetch(SamplerT texture, IntCoord coord, int lod) +{ + return texture.fetch(coord, level(lod)); +} + +SAMPLER_FN DataVec texelFetchOffset(SamplerT texture, IntCoord coord, int lod, IntDeriv offset) +{ + return texture.fetch(coord, level(lod), offset); +} + +#undef SAMPLER_FN + +/** Image functions */ + +#define IMAGE_FN \ + template + +IMAGE_FN SizeVec imageSize(SamplerT texture) +{ + return texture.size(); +} + +IMAGE_FN void imageFence(SamplerT texture) +{ + return texture.fence(); +} + +IMAGE_FN DataVec imageLoad(SamplerT texture, IntCoord coord) +{ + if (any(UintCoord(coord) >= UintCoord(texture.size()))) { + return DataVec(0); + } + return texture.load(coord); +} + +IMAGE_FN DataVec imageLoadFast(SamplerT texture, IntCoord coord) +{ + return texture.load(coord); +} + +IMAGE_FN void imageStore(SamplerT texture, IntCoord coord, DataVec data) +{ + if (any(UintCoord(coord) >= UintCoord(texture.size()))) { + return; + } + texture.store(data, coord); +} + +IMAGE_FN +void imageStoreFast(SamplerT texture, IntCoord coord, DataVec data) +{ + texture.store(data, coord); +} + +IMAGE_FN AtomicT imageAtomicMin(SamplerT texture, IntCoord coord, AtomicT data) +{ + return texture.atomic_min(coord, data); +} +IMAGE_FN AtomicT imageAtomicMax(SamplerT texture, IntCoord coord, AtomicT data) +{ + return texture.atomic_max(coord, data); +} +IMAGE_FN AtomicT imageAtomicAdd(SamplerT texture, IntCoord coord, AtomicT data) +{ + return texture.atomic_add(coord, data); +} +IMAGE_FN AtomicT imageAtomicAnd(SamplerT texture, IntCoord coord, AtomicT data) +{ + return texture.atomic_and(coord, data); +} +IMAGE_FN AtomicT imageAtomicOr(SamplerT texture, IntCoord coord, AtomicT data) +{ + return texture.atomic_or(coord, data); +} +IMAGE_FN AtomicT imageAtomicXor(SamplerT texture, IntCoord coord, AtomicT data) +{ + return texture.atomic_xor(coord, data); +} +IMAGE_FN AtomicT imageAtomicExchange(SamplerT texture, IntCoord coord, AtomicT data) +{ + return texture.atomic_exchange(coord, data); +} + +#undef IMAGE_FN + /* Add any types as needed. */ -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array); -COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array); +#define TEMPLATE template +TEMPLATE using depth2D = _mtl_sampler, true, 2, 0, 0>; +TEMPLATE using depth2DArray = _mtl_sampler, true, 2, 0, 1>; +TEMPLATE using depthCube = _mtl_sampler, true, 2, 1, 1>; +TEMPLATE using depthCubeArray = _mtl_sampler, true, 2, 1, 1>; +TEMPLATE using sampler1D = _mtl_sampler, false, 1, 0, 0>; +TEMPLATE using sampler1DArray = _mtl_sampler, false, 1, 0, 1>; +TEMPLATE using sampler2D = _mtl_sampler, false, 2, 0, 0>; +TEMPLATE using sampler2DArray = _mtl_sampler, false, 2, 0, 1>; +TEMPLATE using sampler3D = _mtl_sampler, false, 3, 0, 0>; +TEMPLATE using samplerBuffer = _mtl_sampler, false, 1, 0, 0>; +TEMPLATE using samplerCube = _mtl_sampler, false, 2, 1, 0>; +TEMPLATE using samplerCubeArray = _mtl_sampler, false, 2, 1, 1>; +/* Atomic textures are defined as 2D textures with special layout for 3D texture emulation. */ +TEMPLATE using sampler2DAtomic = _mtl_sampler, false, 2, 0, 0, true>; +TEMPLATE using sampler2DArrayAtomic = _mtl_sampler, false, 2, 0, 1, true>; +TEMPLATE using sampler3DAtomic = _mtl_sampler, false, 3, 0, 0, true>; + +/* Used by backend to declare the samplers. Could be removed. */ +TEMPLATE using _mtl_sampler_depth_2d = depth2D; +TEMPLATE using _mtl_sampler_depth_2d_array = depth2DArray; +TEMPLATE using _mtl_sampler_depth_cube = depthCube; +TEMPLATE using _mtl_sampler_depth_cube_array = depthCubeArray; +TEMPLATE using _mtl_sampler_1d = sampler1D; +TEMPLATE using _mtl_sampler_1d_array = sampler1DArray; +TEMPLATE using _mtl_sampler_2d = sampler2D; +TEMPLATE using _mtl_sampler_2d_array = sampler2DArray; +TEMPLATE using _mtl_sampler_3d = sampler3D; +TEMPLATE using _mtl_sampler_buffer = samplerBuffer; +TEMPLATE using _mtl_sampler_cube = samplerCube; +TEMPLATE using _mtl_sampler_cube_array = samplerCubeArray; +TEMPLATE using _mtl_sampler_2d_atomic = sampler2DAtomic; +TEMPLATE using _mtl_sampler_2d_array_atomic = sampler2DArrayAtomic; +TEMPLATE using _mtl_sampler_3d_atomic = sampler3DAtomic; +#undef TEMPLATE + +/* Variant for 1D samplers. Discard the lod. */ +template +typename sampler1D::DataVec texelFetch(sampler1D texture, int coord, int lod = 0) +{ + return texture.fetch(coord); +} + +/* Variant for 1DArray samplers. Discard the lod. */ +template +typename sampler1DArray::DataVec texelFetch(sampler1DArray texture, + int2 coord, + int lod = 0) +{ + return texture.fetch(coord); +} + +/* Variant for buffer samplers. Discard the lod. */ +template +typename samplerBuffer::DataVec texelFetch(samplerBuffer texture, + int coord, + int lod = 0) +{ + uint texel = uint(coord); + if (texel < texture.texture->get_width()) { + return texture.texture->read(texel); + } + return typename samplerBuffer::DataVec(0); +} /* Sampler struct for argument buffer. */ #ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS @@ -283,51 +742,51 @@ struct SStruct { #endif /* Samplers as function parameters. */ -#define sampler1D thread _mtl_combined_image_sampler_1d -#define sampler1DArray thread _mtl_combined_image_sampler_1d_array -#define sampler2D thread _mtl_combined_image_sampler_2d -#define depth2D thread _mtl_combined_image_sampler_depth_2d -#define sampler2DArray thread _mtl_combined_image_sampler_2d_array -#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array -#define depth2DArray thread _mtl_combined_image_sampler_depth_2d_array -#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array -#define sampler3D thread _mtl_combined_image_sampler_3d -#define samplerBuffer thread _mtl_combined_image_sampler_buffer -#define samplerCube thread _mtl_combined_image_sampler_cube -#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array +#define sampler1D thread _mtl_sampler_1d +#define sampler1DArray thread _mtl_sampler_1d_array +#define sampler2D thread _mtl_sampler_2d +#define depth2D thread _mtl_sampler_depth_2d +#define sampler2DArray thread _mtl_sampler_2d_array +#define sampler2DArrayShadow thread _mtl_sampler_depth_2d_array +#define depth2DArray thread _mtl_sampler_depth_2d_array +#define depth2DArrayShadow thread _mtl_sampler_depth_2d_array +#define sampler3D thread _mtl_sampler_3d +#define samplerBuffer thread _mtl_sampler_buffer +#define samplerCube thread _mtl_sampler_cube +#define samplerCubeArray thread _mtl_sampler_cube_array -#define usampler1D thread _mtl_combined_image_sampler_1d -#define usampler1DArray thread _mtl_combined_image_sampler_1d_array -#define usampler2D thread _mtl_combined_image_sampler_2d -#define udepth2D thread _mtl_combined_image_sampler_depth_2d -#define usampler2DArray thread _mtl_combined_image_sampler_2d_array -#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array -#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array -#define usampler3D thread _mtl_combined_image_sampler_3d -#define usamplerBuffer thread _mtl_combined_image_sampler_buffer -#define usamplerCube thread _mtl_combined_image_sampler_cube -#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array +#define usampler1D thread _mtl_sampler_1d +#define usampler1DArray thread _mtl_sampler_1d_array +#define usampler2D thread _mtl_sampler_2d +#define udepth2D thread _mtl_sampler_depth_2d +#define usampler2DArray thread _mtl_sampler_2d_array +#define usampler2DArrayShadow thread _mtl_sampler_depth_2d_array +#define udepth2DArrayShadow thread _mtl_sampler_depth_2d_array +#define usampler3D thread _mtl_sampler_3d +#define usamplerBuffer thread _mtl_sampler_buffer +#define usamplerCube thread _mtl_sampler_cube +#define usamplerCubeArray thread _mtl_sampler_cube_array -#define isampler1D thread _mtl_combined_image_sampler_1d -#define isampler1DArray thread _mtl_combined_image_sampler_1d_array -#define isampler2D thread _mtl_combined_image_sampler_2d -#define idepth2D thread _mtl_combined_image_sampler_depth_2d -#define isampler2DArray thread _mtl_combined_image_sampler_2d_array -#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array -#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array -#define isampler3D thread _mtl_combined_image_sampler_3d -#define isamplerBuffer thread _mtl_combined_image_sampler_buffer -#define isamplerCube thread _mtl_combined_image_sampler_cube -#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array +#define isampler1D thread _mtl_sampler_1d +#define isampler1DArray thread _mtl_sampler_1d_array +#define isampler2D thread _mtl_sampler_2d +#define idepth2D thread _mtl_sampler_depth_2d +#define isampler2DArray thread _mtl_sampler_2d_array +#define isampler2DArrayShadow thread _mtl_sampler_depth_2d_array +#define idepth2DArrayShadow thread _mtl_sampler_depth_2d_array +#define isampler3D thread _mtl_sampler_3d +#define isamplerBuffer thread _mtl_sampler_buffer +#define isamplerCube thread _mtl_sampler_cube +#define isamplerCubeArray thread _mtl_sampler_cube_array #ifndef MTL_SUPPORTS_TEXTURE_ATOMICS /* If texture atomics are unsupported, map atomic types to internal atomic fallback type. */ -# define usampler2DArrayAtomic _mtl_combined_image_sampler_2d_array_atomic_fallback -# define usampler2DAtomic _mtl_combined_image_sampler_2d_atomic_fallback -# define usampler3DAtomic _mtl_combined_image_sampler_3d_atomic_fallback -# define isampler2DArrayAtomic _mtl_combined_image_sampler_2d_array_atomic_fallback -# define isampler2DAtomic _mtl_combined_image_sampler_2d_atomic_fallback -# define isampler3DAtomic _mtl_combined_image_sampler_3d_atomic_fallback +# define usampler2DArrayAtomic _mtl_sampler_2d_array_atomic +# define usampler2DAtomic _mtl_sampler_2d_atomic +# define usampler3DAtomic _mtl_sampler_3d_atomic +# define isampler2DArrayAtomic _mtl_sampler_2d_array_atomic +# define isampler2DAtomic _mtl_sampler_2d_atomic +# define isampler3DAtomic _mtl_sampler_3d_atomic #else # define usampler2DArrayAtomic usampler2DArray # define usampler2DAtomic usampler2D @@ -340,1544 +799,32 @@ struct SStruct { /* Vector accessor aliases. */ #define st xy -/* Texture functions. */ -#define texelFetch _texelFetch_internal -#define texelFetchOffset(__tex, __texel, __lod, __offset) \ - _texelFetch_internal(__tex, __texel, __lod, __offset) -#define imageLoad(__image, __coord) _texelFetch_internal(__image, __coord, 0) -#define imageLoadFast(__image, __coord) _texelFetch_internal_fast(__image, __coord, 0) -#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv) -#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias))) -#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod))) -#define textureLodOffset(__tex, __uv, __lod, __offset) \ - _texture_internal_level(__tex, __uv, level(float(__lod)), __offset) -#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0) -#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp) -#define textureGatherOffset(__tex, __offset, __uv, __comp) \ - _texture_gather_internal(__tex, __uv, __comp, __offset) -#define textureGrad(__tex, __uv, __dpdx, __dpdy) \ - _texture_grad_internal(__tex, __uv, __dpdx, __dpdy) - -#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME -#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__) -#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__) - -/* Texture-write functions. */ -#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value) -#define imageStoreFast(_tex, _coord, _value) _texture_write_internal_fast(_tex, _coord, _value) - -/* Texture synchronization functions. */ -#define imageFence(image) image.texture->fence() - -/* Singular return values from texture functions of type DEPTH are often indexed with either .r or - * .x. This is a lightweight wrapper type for handling this syntax. */ -union _msl_return_float { - float r; - float x; - inline operator float() const - { - return r; - } -}; - -/* Add custom texture sampling/reading routines for each type to account for special return cases, - * e.g. returning a float with an r parameter Note: Cannot use template specialization for input - * type, as return types are specific to the signature of 'tex'. */ - -/* Use point sampler instead of texture read to benefit from texture caching and reduce branching - * through removal of bounds tests, as these are handled by the sample operation. */ -constexpr sampler _point_sample_(address::clamp_to_zero, filter::nearest, coord::pixel); - -/* Texture Read via point sampling. - * NOTE: These templates will evaluate first for texture resources bound with sample. */ -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d tex, - T texel, - uint lod = 0) -{ - return tex.texture->sample(_point_sample_, float(texel)); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d tex, - T texel, - uint lod, - T offset) -{ - return tex.texture->sample(_point_sample_, float(texel + offset)); -} - -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_1d_array tex, - vec texel, - uint lod, - vec offset = vec(0, 0)) -{ - return tex.texture->sample(_point_sample_, float(texel.x + offset.x), uint(texel.y + offset.y)); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_2d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod)); -} - -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_2d_array tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->sample( - _point_sample_, float2(texel.xy + offset.xy), uint(texel.z + offset.z), level(lod)); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_3d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->sample(_point_sample_, float3(texel.xyz + offset.xyz), level(lod)); -} - -template -inline _msl_return_float _texelFetch_internal( - thread _mtl_combined_image_sampler_depth_2d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - _msl_return_float fl = { - tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod))}; - return fl; -} - -template -inline vec _texture_internal_samp( - thread _mtl_combined_image_sampler_2d_array tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->sample( - _point_sample_, float2(texel.xy + offset.xy), uint(texel.z + offset.z), level(lod)); -} - -/* Texture Read via read operation. Required by compute/image-bindings. */ -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d tex, - T texel, - uint lod = 0) -{ - float w = tex.texture->get_width(); - if (texel >= 0 && texel < w) { - return tex.texture->read(uint(texel)); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d tex, - T texel, - uint lod = 0) -{ - return tex.texture->read(uint(texel)); -} - -template -inline vec _texelFetch_internal( - const thread _mtl_combined_image_sampler_buffer tex, T texel, uint lod = 0) -{ - float w = tex.texture->get_width(); - if (texel >= 0 && texel < w) { - return tex.texture->read(uint(texel)); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal_fast( - const thread _mtl_combined_image_sampler_buffer tex, T texel, uint lod = 0) -{ - return tex.texture->read(uint(texel)); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d tex, - T texel, - uint lod, - T offset) -{ - float w = tex.texture->get_width(); - if ((texel + offset) >= 0 && (texel + offset) < w) { - /* LODs not supported for 1d textures. This must be zero. */ - return tex.texture->read(uint(texel + offset), 0); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d tex, - T texel, - uint lod, - T offset) -{ - /* LODs not supported for 1d textures. This must be zero. */ - return tex.texture->read(uint(texel + offset), 0); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array tex, - vec texel, - uint lod, - vec offset = vec(0, 0)) -{ - - float w = tex.texture->get_width(); - float h = tex.texture->get_array_size(); - if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && - (texel.y + offset.y) < h) - { - /* LODs not supported for 1d textures. This must be zero. */ - return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d_array tex, - vec texel, - uint lod, - vec offset = vec(0, 0)) -{ - /* LODs not supported for 1d textures. This must be zero. */ - return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_2d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - - float w = tex.texture->get_width() >> lod; - float h = tex.texture->get_height() >> lod; - if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && - (texel.y + offset.y) < h) - { - return tex.texture->read(uint2(texel + offset), lod); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->read(uint2(texel + offset), lod); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - float w = tex.texture->get_width() >> lod; - float h = tex.texture->get_height() >> lod; - float d = tex.texture->get_array_size(); - if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && - (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) - { - return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d_array tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); -} - -template -inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_3d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - - float w = tex.texture->get_width() >> lod; - float h = tex.texture->get_height() >> lod; - float d = tex.texture->get_depth() >> lod; - if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && - (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) - { - return tex.texture->read(uint3(texel + offset), lod); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_3d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->read(uint3(texel + offset), lod); -} - -template -inline _msl_return_float _texelFetch_internal( - thread _mtl_combined_image_sampler_depth_2d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - - float w = tex.texture->get_width() >> lod; - float h = tex.texture->get_height() >> lod; - if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && - (texel.y + offset.y) < h) - { - _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)}; - return fl; - } - else { - _msl_return_float fl = {0}; - return fl; - } -} - -template -inline _msl_return_float _texelFetch_internal_fast( - thread _mtl_combined_image_sampler_depth_2d tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)}; - return fl; -} - -template -inline vec _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - - float w = tex.texture->get_width() >> lod; - float h = tex.texture->get_height() >> lod; - float d = tex.texture->get_array_size(); - if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && - (texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d) - { - return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); - } - else { - return vec(0); - } -} - -/* Sample. */ -template -inline vec _texture_internal_samp( - thread _mtl_combined_image_sampler_1d tex, float u) -{ - return tex.texture->sample(*tex.samp, u); -} - -inline float4 _texture_internal_samp( - thread _mtl_combined_image_sampler_1d_array tex, float2 ua) -{ - return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); -} - -inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d tex, - float2 uv) -{ - return tex.texture->sample(*tex.samp, uv); -} - -inline uint4 _texture_internal_samp( - thread _mtl_combined_image_sampler_2d tex, float2 uv) -{ - return tex.texture->sample(*tex.samp, uv); -} - -inline float4 _texture_internal_samp( - thread _mtl_combined_image_sampler_2d tex, float2 uv) -{ - return tex.texture->sample(*tex.samp, uv); -} - -inline _msl_return_float _texture_internal_samp( - thread _mtl_combined_image_sampler_depth_2d tex, float2 uv) -{ - _msl_return_float fl = {tex.texture->sample(*tex.samp, uv)}; - return fl; -} - -template -inline vec _texture_internal_samp( - thread _mtl_combined_image_sampler_3d tex, float3 uvw) -{ - return tex.texture->sample(*tex.samp, uvw); -} - -template -inline vec _texture_internal_samp( - thread _mtl_combined_image_sampler_2d_array tex, float3 uva) -{ - return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z)); -} - -inline _msl_return_float _texture_internal_samp( - thread _mtl_combined_image_sampler_depth_2d_array tex, float3 uva) -{ - _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))}; - return fl; -} - -inline _msl_return_float _texture_internal_samp( - thread _mtl_combined_image_sampler_depth_2d_array tex, float4 uvac) -{ - _msl_return_float fl = { - tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))}; - return fl; -} - -template -inline vec _texture_internal_samp( - thread _mtl_combined_image_sampler_cube tex, float3 uvs) -{ - return tex.texture->sample(*tex.samp, uvs.xyz); -} - -template -inline vec _texture_internal_samp( - thread _mtl_combined_image_sampler_cube_array tex, float4 coord_a) -{ - return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w)); -} - -/* Sample Level. */ -template -inline vec _texture_internal_level( - thread _mtl_combined_image_sampler_1d tex, - float u, - level options, - int offset = 0) -{ - /* LODs not supported for 1d textures. This must be zero. */ - return tex.texture->sample(*tex.samp, u); -} - -inline float4 _texture_internal_level( - thread _mtl_combined_image_sampler_1d_array tex, - float2 ua, - level options, - int offset = 0) -{ - /* LODs not supported for 1d textures. This must be zero. */ - return tex.texture->sample(*tex.samp, ua.x, uint(ua.y)); -} - -inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d tex, - float2 uv, - level options, - int2 offset = int2(0)) -{ - return tex.texture->sample(*tex.samp, uv, options, offset); -} - -inline uint4 _texture_internal_level( - thread _mtl_combined_image_sampler_2d tex, - float2 uv, - level options, - int2 offset = int2(0)) -{ - return tex.texture->sample(*tex.samp, uv, options, offset); -} - -inline float4 _texture_internal_level( - thread _mtl_combined_image_sampler_2d tex, - float2 uv, - level options, - int2 offset = int2(0)) -{ - return tex.texture->sample(*tex.samp, uv, options, offset); -} - -inline _msl_return_float _texture_internal_level( - thread _mtl_combined_image_sampler_depth_2d tex, - float2 uv, - level options, - int2 offset = int2(0)) -{ - _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; - return fl; -} - -template -inline vec _texture_internal_level( - thread _mtl_combined_image_sampler_3d tex, - float3 uvw, - level options = level(0), - int3 offset = int3(0)) -{ - return tex.texture->sample(*tex.samp, uvw, options, offset); -} - -template -inline vec _texture_internal_level( - thread _mtl_combined_image_sampler_2d_array tex, - float3 uva, - level options = level(0), - int2 offset = int2(0)) -{ - return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset); -} - -inline _msl_return_float _texture_internal_level( - thread _mtl_combined_image_sampler_depth_2d_array tex, - float3 uva, - level options = level(0), - int2 offset = int2(0)) -{ - _msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)}; - return fl; -} - -inline _msl_return_float _texture_internal_level( - thread _mtl_combined_image_sampler_depth_2d_array tex, - float4 uvac, - level options = level(0), - int2 offset = int2(0)) -{ - _msl_return_float fl = { - tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)}; - return fl; -} - -template -inline vec _texture_internal_level( - thread _mtl_combined_image_sampler_cube tex, - float3 uvs, - level options = level(0), - int2 offset = int2(0)) -{ - return tex.texture->sample(*tex.samp, uvs.xyz, options); -} - -template -inline vec _texture_internal_level( - thread _mtl_combined_image_sampler_cube_array tex, - float4 coord_a, - level options = level(0), - int3 offset = int3(0)) -{ - return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options); -} - -/* Sample Bias. */ -template -inline vec _texture_internal_bias( - thread _mtl_combined_image_sampler_1d tex, - float u, - bias options = bias(0.0), - int offset = 0) -{ - return tex.texture->sample(*tex.samp, u); -} - -inline float4 _texture_internal_bias( - thread _mtl_combined_image_sampler_2d tex, - float2 uv, - bias options = bias(0.0), - int2 offset = int2(0)) -{ - return tex.texture->sample(*tex.samp, uv, options, offset); -} - -inline _msl_return_float _texture_internal_bias( - thread _mtl_combined_image_sampler_depth_2d tex, - float2 uv, - bias options = bias(0), - int2 offset = int2(0)) -{ - _msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)}; - return fl; -} - -/* Texture Gather. */ -component int_to_component(const int comp) -{ - switch (comp) { - default: - case 0: - return component::x; - case 1: - return component::y; - case 2: - return component::z; - case 3: - return component::w; - } - return component::x; -} - -inline float4 _texture_gather_internal( - thread _mtl_combined_image_sampler_depth_2d tex, - float2 uv, - const int comp = 0, - int2 offset = int2(0)) -{ - return tex.texture->gather(*tex.samp, uv, offset); -} - -inline float4 _texture_gather_internal( - thread _mtl_combined_image_sampler_depth_2d_array tex, - float3 uva, - const int comp = 0, - int2 offset = int2(0)) -{ - return tex.texture->gather(*tex.samp, uva.xy, uint(uva.z), offset); -} - -template -inline vec _texture_gather_internal( - thread _mtl_combined_image_sampler_2d tex, - float2 uv, - const int comp = 0, - int2 offset = int2(0)) -{ - return tex.texture->gather(*tex.samp, uv, offset); -} - -template -inline vec _texture_gather_internal( - thread _mtl_combined_image_sampler_2d_array tex, - float3 uva, - const int comp = 0, - int2 offset = int2(0)) -{ - return tex.texture->gather(*tex.samp, uva.xy, uint(uva.z), offset); -} - -/* Texture Grad. */ -inline float4 _texture_grad_internal( - thread _mtl_combined_image_sampler_2d tex, - float2 uv, - float2 dpdx, - float2 dpdy) -{ - return tex.texture->sample(*tex.samp, uv, gradient2d(dpdx, dpdy)); -} - -inline float4 _texture_grad_internal( - thread _mtl_combined_image_sampler_2d_array tex, - float3 uva, - float2 dpdx, - float2 dpdy) -{ - return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), gradient2d(dpdx, dpdy)); -} - -inline float4 _texture_grad_internal( - thread _mtl_combined_image_sampler_3d tex, - float3 uvw, - float3 dpdx, - float3 dpdy) -{ - return tex.texture->sample(*tex.samp, uvw, gradient3d(dpdx, dpdy)); -} - -/* Texture write support. */ -template -inline void _texture_write_internal(thread _mtl_combined_image_sampler_1d tex, - T _coord, - vec value) -{ - float w = tex.texture->get_width(); - if (_coord >= 0 && _coord < w) { - tex.texture->write(value, uint(_coord)); - } -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d tex, - T _coord, - vec value) -{ - tex.texture->write(value, uint(_coord)); -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d tex, - T _coord, - S value) -{ - tex.texture->write(value, uint(_coord)); -} - -template -inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d tex, - T _coord, - vec value) -{ - float w = tex.texture->get_width(); - float h = tex.texture->get_height(); - if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) { - tex.texture->write(value, uint2(_coord.xy)); - } -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d tex, - T _coord, - vec value) -{ - tex.texture->write(value, uint2(_coord.xy)); -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d tex, - T _coord, - S value) -{ - tex.texture->write(value, uint2(_coord.xy)); -} - -template -inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d_array tex, - T _coord, - vec value) -{ - float w = tex.texture->get_width(); - float h = tex.texture->get_height(); - float d = tex.texture->get_array_size(); - if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && - _coord.z < d) - { - tex.texture->write(value, uint2(_coord.xy), _coord.z); - } -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array tex, - T _coord, - vec value) -{ - tex.texture->write(value, uint2(_coord.xy), _coord.z); -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array tex, - T _coord, - S value) -{ - tex.texture->write(value, uint2(_coord.xy), _coord.z); -} - -template -inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d tex, - T _coord, - vec value) -{ - float w = tex.texture->get_width(); - float h = tex.texture->get_height(); - float d = tex.texture->get_depth(); - if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && - _coord.z < d) - { - tex.texture->write(value, uint3(_coord.xyz)); - } -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d tex, - T _coord, - vec value) -{ - tex.texture->write(value, uint3(_coord.xyz)); -} - -template -inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d tex, - T _coord, - S value) -{ - tex.texture->write(value, uint3(_coord.xyz)); -} - -/* Texture atomic operations are only supported in Metal 3.1 and onward (macOS 14.0 Sonoma). */ -#ifdef MTL_SUPPORTS_TEXTURE_ATOMICS - -/* Image atomic operations. */ -# define imageAtomicMin(tex, coord, data) _texture_image_atomic_min_internal(tex, coord, data) -# define imageAtomicAdd(tex, coord, data) _texture_image_atomic_add_internal(tex, coord, data) -# define imageAtomicExchange(tex, coord, data) \ - _texture_image_atomic_exchange_internal(tex, coord, data) -# define imageAtomicXor(tex, coord, data) _texture_image_atomic_xor_internal(tex, coord, data) -# define imageAtomicOr(tex, coord, data) _texture_image_atomic_or_internal(tex, coord, data) - -/* Atomic OR. */ -template -S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_1d tex, - int coord, - S data) -{ - return tex.texture->atomic_fetch_or(uint(coord), vec(data)).x; -} - -template -S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_1d_array tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_or(uint(coord.x), uint(coord.y), vec(data)).x; -} - -template -S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_2d tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_or(uint2(coord.xy), vec(data)).x; -} - -template -S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_2d_array tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_or(uint2(coord.xy), uint(coord.z), vec(data)).x; -} - -template -S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_3d tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_or(uint3(coord), vec(data)).x; -} - -/* Atomic XOR. */ -template -S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_1d tex, - int coord, - S data) -{ - return tex.texture->atomic_fetch_xor(uint(coord), vec(data)).x; -} - -template -S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_1d_array tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_xor(uint(coord.x), uint(coord.y), vec(data)).x; -} - -template -S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_2d tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_xor(uint2(coord.xy), vec(data)).x; -} - -template -S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_2d_array tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_xor(uint2(coord.xy), uint(coord.z), vec(data)).x; -} - -template -S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_3d tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_xor(uint3(coord), vec(data)).x; -} - -/* Atomic Min. */ -template -S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d tex, - int coord, - S data) -{ - return tex.texture->atomic_fetch_min(uint(coord), vec(data)).x; -} - -template -S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), vec(data)).x; -} - -template -S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_min(uint2(coord.xy), vec(data)).x; -} - -template -S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), vec(data)).x; -} - -template -S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_min(uint3(coord), vec(data)).x; -} - -/* Atomic Add. */ -template -S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_1d tex, - int coord, - S data) -{ - return tex.texture->atomic_fetch_add(uint(coord), vec(data)).x; -} - -template -S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_1d_array tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_add(uint(coord.x), uint(coord.y), vec(data)).x; -} - -template -S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_2d tex, - int2 coord, - S data) -{ - return tex.texture->atomic_fetch_add(uint2(coord.xy), vec(data)).x; -} - -template -S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_2d_array tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_add(uint2(coord.xy), uint(coord.z), vec(data)).x; -} - -template -S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_3d tex, - int3 coord, - S data) -{ - return tex.texture->atomic_fetch_add(uint3(coord), vec(data)).x; -} - -/* Atomic Exchange. */ -template -S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d tex, - int coord, - S data) -{ - return tex.texture->atomic_exchange(uint(coord), vec(data)).x; -} - -template -S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_array tex, - int2 coord, - S data) -{ - return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), vec(data)).x; -} - -template -S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d tex, - int2 coord, - S data) -{ - return tex.texture->atomic_exchange(uint2(coord.xy), vec(data)).x; -} - -template -S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array tex, - int3 coord, - S data) -{ - return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), vec(data)).x; -} - -template -S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d tex, - int3 coord, - S data) -{ - return tex.texture->atomic_exchange(uint3(coord), vec(data)).x; -} - -#else - -/** - * Texture atomic fallback function entry points. - * NOTE: When texture atomics are unsupported, the wrapped type contains a buffer-backed 2D - * texture. Atomic operations happen directly on the underlying buffer, and texture coordinates are - * remapped into 2D texture space from 2D Array or 3D texture coordinates. - */ - -/* Image atomic operations. */ -# define imageAtomicMin(tex, coord, data) \ - _texture_image_atomic_min_internal_fallback(tex, coord, data) -# define imageAtomicAdd(tex, coord, data) \ - _texture_image_atomic_add_internal_fallack(tex, coord, data) -# define imageAtomicExchange(tex, coord, data) \ - _texture_image_atomic_exchange_internal_fallack(tex, coord, data) -# define imageAtomicXor(tex, coord, data) \ - _texture_image_atomic_xor_internal_fallack(tex, coord, data) -# define imageAtomicOr(tex, coord, data) \ - _texture_image_atomic_or_internal_fallack(tex, coord, data) - -/** Pixel address location remapping. */ - -/* Map 2D/3D texture coordinate into a linear pixel ID. */ -template -uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_atomic_fallback tex, - uint2 coord) -{ - return (coord.x + coord.y * uint(tex.texture->get_width())); -} -template -uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_atomic_fallback tex, - int2 coord) -{ - return tex_coord_to_linear_px(tex, uint2(coord)); -} - -template -uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, - uint3 coord) -{ - return (coord.x + coord.y * tex.texture_size.x + - coord.z * (tex.texture_size.x * tex.texture_size.y)); -} -template -uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, - int3 coord) -{ - return tex_coord_to_linear_px(tex, uint3(coord)); -} - -template -uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_3d_atomic_fallback tex, - uint3 coord) -{ - return (coord.x + coord.y * tex.texture_size.x + - coord.z * (tex.texture_size.x * tex.texture_size.y)); -} -template -uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_3d_atomic_fallback tex, - int3 coord) -{ - return tex_coord_to_linear_px(tex, uint3(coord)); -} - -/* Map 3D texture coordinate into 2D texture space. */ -template -uint2 tex_coord_3d_to_2d(thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, - uint3 coord) -{ - uint linear_id = tex_coord_to_linear_px(tex, coord); - uint tex_full_w = uint(tex.texture->get_width()); - uint2 out_2dcoord; - out_2dcoord.y = linear_id / tex_full_w; - out_2dcoord.x = linear_id - (out_2dcoord.y * tex_full_w); - return out_2dcoord; -} - -template -uint2 tex_coord_3d_to_2d(thread _mtl_combined_image_sampler_3d_atomic_fallback tex, - uint3 coord) -{ - uint linear_id = tex_coord_to_linear_px(tex, coord); - uint tex_full_w = uint(tex.texture->get_width()); - uint2 out_2dcoord; - out_2dcoord.y = linear_id / tex_full_w; - out_2dcoord.x = linear_id - (out_2dcoord.y * tex_full_w); - return out_2dcoord; -} - -template bool in_range(vec value, vec min, vec max) -{ - return (all(value >= min) && all(value < max)); -} - -/* Map 2D/3D texture coordinate into buffer index, accounting for padded row widths. */ -template -uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_2d_atomic_fallback tex, - uint2 coord) -{ - return (coord.x + coord.y * uint(tex.aligned_width)); -} -template -uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_2d_atomic_fallback tex, - int2 coord) -{ - return tex_coord_to_linear_buffer_id(tex, uint2(coord)); -} - -template -uint tex_coord_to_linear_buffer_id( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, uint3 coord) -{ - uint2 coord2d = tex_coord_3d_to_2d(tex, coord); - return (coord2d.x + coord2d.y * uint(tex.aligned_width)); -} -template -uint tex_coord_to_linear_buffer_id( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, int3 coord) -{ - return tex_coord_to_linear_buffer_id(tex, uint3(coord)); -} - -template -uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_3d_atomic_fallback tex, - uint3 coord) -{ - uint2 coord2d = tex_coord_3d_to_2d(tex, coord); - return (coord2d.x + coord2d.y * uint(tex.aligned_width)); -} -template -uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_3d_atomic_fallback tex, - int3 coord) -{ - return tex_coord_to_linear_buffer_id(tex, uint3(coord)); -} - -/* imageAtomicMin. */ - -template -S _texture_image_atomic_min_internal_fallback( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, int2 coord, S data) -{ - if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0)))) - { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicMin(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_min_internal_fallback( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicMin(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_min_internal_fallback( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicMin(tex.buffer[linear_id], data); -} - -/* imageAtomicAdd. */ -template -S _texture_image_atomic_add_internal_fallack( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, int2 coord, S data) -{ - if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0)))) - { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicAdd(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_add_internal_fallack( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicAdd(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_add_internal_fallack( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicAdd(tex.buffer[linear_id], data); -} - -/* imageAtomicExchange. */ -template -S _texture_image_atomic_exchange_internal_fallack( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, int2 coord, S data) -{ - if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0)))) - { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicExchange(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_exchange_internal_fallack( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicExchange(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_exchange_internal_fallack( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicExchange(tex.buffer[linear_id], data); -} - -/* imageAtomicXor. */ -template -S _texture_image_atomic_xor_internal_fallack( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, int2 coord, S data) -{ - if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0)))) - { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicXor(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_xor_internal_fallack( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicXor(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_xor_internal_fallack( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicXor(tex.buffer[linear_id], data); -} - -/* imageAtomicOr. */ -template -S _texture_image_atomic_or_internal_fallack( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, int2 coord, S data) -{ - if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0)))) - { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicOr(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_or_internal_fallack( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicOr(tex.buffer[linear_id], data); -} - -template -S _texture_image_atomic_or_internal_fallack( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, int3 coord, S data) -{ - if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) { - return S(0); - } - uint linear_id = tex_coord_to_linear_buffer_id(tex, coord); - return atomicOr(tex.buffer[linear_id], data); -} - -/** Texture sampling, reading and writing functions with layer mapping. */ - -/* Texel Fetch. */ -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod)); -} - -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->sample( - _point_sample_, float2(tex_coord_3d_to_2d(tex, uint3(texel + offset))), level(lod)); -} - -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->sample( - _point_sample_, float2(tex_coord_3d_to_2d(tex, uint3(texel + offset))), level(lod)); -} - -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - - float w = tex.texture->get_width() >> lod; - float h = tex.texture->get_height() >> lod; - if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 && - (texel.y + offset.y) < h) - { - return tex.texture->read(uint2(texel + offset), lod); - } - else { - return vec(0); - } -} - -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->read(tex_coord_3d_to_2d(tex, uint3(texel + offset)), lod); -} - -template -inline vec _texelFetch_internal( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, - vec texel, - uint lod, - vec offset = vec(0)) -{ - return tex.texture->read(tex_coord_3d_to_2d(tex, uint3(texel + offset)), lod); -} - -/* imageStore. */ - -template -inline void _texture_write_internal( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, T _coord, vec value) -{ - float w = tex.texture_size.x; - float h = tex.texture_size.y; - if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) { - tex.texture->write(value, uint2(_coord.xy)); - } -} - -template -inline void _texture_write_internal_fast( - thread _mtl_combined_image_sampler_2d_atomic_fallback tex, T _coord, vec value) -{ - tex.texture->write(value, uint2(_coord.xy)); -} - -template -inline void _texture_write_internal( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, - T _coord, - vec value) -{ - float w = tex.texture_size.x; - float h = tex.texture_size.y; - float d = tex.texture_size.z; - if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && - _coord.z < d) - { - tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord))); - } -} - -template -inline void _texture_write_internal_fast( - thread _mtl_combined_image_sampler_2d_array_atomic_fallback tex, - T _coord, - vec value) -{ - tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord))); -} - -template -inline void _texture_write_internal( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, T _coord, vec value) -{ - float w = tex.texture_size.x; - float h = tex.texture_size.y; - float d = tex.texture_size.z; - if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 && - _coord.z < d) - { - tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord))); - } -} - -template -inline void _texture_write_internal_fast( - thread _mtl_combined_image_sampler_3d_atomic_fallback tex, T _coord, vec value) -{ - tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord))); -} - -#endif - /* Matrix compare operators. */ -/** TODO(fclem): Template. */ -inline bool operator==(float4x4 a, float4x4 b) -{ - for (int i = 0; i < 4; i++) { - if (any(a[i] != b[i])) { - return false; - } +#define EQ_OP(type, ...) \ + inline bool operator==(type a, type b) \ + { \ + return __VA_ARGS__; \ } - return true; -} -inline bool operator==(float3x3 a, float3x3 b) -{ - for (int i = 0; i < 3; i++) { - if (any(a[i] != b[i])) { - return false; - } - } - return true; -} -inline bool operator==(float2x2 a, float2x2 b) -{ - for (int i = 0; i < 2; i++) { - if (any(a[i] != b[i])) { - return false; - } - } - return true; -} +EQ_OP(float2x2, all(a[0] == b[0]) && all(a[1] == b[1])) +EQ_OP(float2x3, all(a[0] == b[0]) && all(a[1] == b[1])) +EQ_OP(float2x4, all(a[0] == b[0]) && all(a[1] == b[1])) +EQ_OP(float3x2, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2])) +EQ_OP(float3x3, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2])) +EQ_OP(float3x4, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2])) +EQ_OP(float4x2, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3])) +EQ_OP(float4x3, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3])) +EQ_OP(float4x4, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3])) +#undef EQ_OP -inline bool operator!=(float4x4 a, float4x4 b) -{ - return !(a == b); -} -inline bool operator!=(float3x3 a, float3x3 b) -{ - return !(a == b); -} -inline bool operator!=(float2x2 a, float2x2 b) +template inline bool operator!=(matrix a, matrix b) { return !(a == b); } /* Matrix unary minus operator. */ - -inline float4x4 operator-(float4x4 a) +template inline matrix operator-(matrix a) { - float4x4 b; - for (int i = 0; i < 4; i++) { - b[i] = -a[i]; - } - return b; -} -inline float3x3 operator-(float3x3 a) -{ - float3x3 b; - for (int i = 0; i < 3; i++) { - b[i] = -a[i]; - } - return b; -} -inline float2x2 operator-(float2x2 a) -{ - float2x2 b; - for (int i = 0; i < 2; i++) { - b[i] = -a[i]; - } - return b; + return a * -1.0; } /* SSBO Vertex Fetch Mode. */ @@ -1954,115 +901,29 @@ inline float2x2 operator-(float2x2 a) /* Common Functions. */ #define dFdx(x) dfdx(x) #define dFdy(x) dfdy(x) -#define mod(x, y) _mtlmod(x, y) #define discard discard_fragment() #define inversesqrt rsqrt -inline float radians(float deg) -{ - /* Constant factor: M_PI_F/180.0. */ - return deg * 0.01745329251f; -} - -inline float degrees(float rad) -{ - /* Constant factor: 180.0/M_PI_F. */ - return rad * 57.2957795131; -} - -#define select(A, B, C) mix(A, B, C) - -/* Type conversions and type truncation. */ -inline float4 to_float4(float3 val) -{ - return float4(val, 1.0); -} - -/* Type conversions and type truncation (Utility Functions). */ -inline float3x3 mat4_to_mat3(float4x4 matrix) -{ - return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz); -} - -inline int floatBitsToInt(float f) -{ - return as_type(f); -} - -inline int2 floatBitsToInt(float2 f) -{ - return as_type(f); -} - -inline int3 floatBitsToInt(float3 f) -{ - return as_type(f); -} - -inline int4 floatBitsToInt(float4 f) -{ - return as_type(f); -} - -inline uint floatBitsToUint(float f) -{ - return as_type(f); -} - -inline uint2 floatBitsToUint(float2 f) -{ - return as_type(f); -} - -inline uint3 floatBitsToUint(float3 f) -{ - return as_type(f); -} - -inline uint4 floatBitsToUint(float4 f) -{ - return as_type(f); -} - -inline float intBitsToFloat(int f) -{ - return as_type(f); -} - -inline float2 intBitsToFloat(int2 f) -{ - return as_type(f); -} - -inline float3 intBitsToFloat(int3 f) -{ - return as_type(f); -} - -inline float4 intBitsToFloat(int4 f) -{ - return as_type(f); -} - -inline float uintBitsToFloat(uint f) -{ - return as_type(f); -} - -inline float2 uintBitsToFloat(uint2 f) -{ - return as_type(f); -} - -inline float3 uintBitsToFloat(uint3 f) -{ - return as_type(f); -} - -inline float4 uintBitsToFloat(uint4 f) -{ - return as_type(f); -} +/* clang-format off */ +inline float radians(float deg) { return deg * 0.01745329251; /* M_PI_F / 180 */ } +inline float degrees(float rad) { return rad * 57.2957795131; /* 180 / M_PI_F */ } +inline int floatBitsToInt(float f) { return as_type(f); } +inline int2 floatBitsToInt(float2 f) { return as_type(f); } +inline int3 floatBitsToInt(float3 f) { return as_type(f); } +inline int4 floatBitsToInt(float4 f) { return as_type(f); } +inline uint floatBitsToUint(float f) { return as_type(f); } +inline uint2 floatBitsToUint(float2 f) { return as_type(f); } +inline uint3 floatBitsToUint(float3 f) { return as_type(f); } +inline uint4 floatBitsToUint(float4 f) { return as_type(f); } +inline float intBitsToFloat(int f) { return as_type(f); } +inline float2 intBitsToFloat(int2 f) { return as_type(f); } +inline float3 intBitsToFloat(int3 f) { return as_type(f); } +inline float4 intBitsToFloat(int4 f) { return as_type(f); } +inline float uintBitsToFloat(uint f) { return as_type(f); } +inline float2 uintBitsToFloat(uint2 f) { return as_type(f); } +inline float3 uintBitsToFloat(uint3 f) { return as_type(f); } +inline float4 uintBitsToFloat(uint4 f) { return as_type(f); } +/* clang-format on */ #define bitfieldReverse reverse_bits #define bitfieldExtract extract_bits @@ -2093,89 +954,32 @@ template int findMSB(T x) return int(sizeof(T) * 8) - 1 - int(clz(x)); } -#define unpackUnorm4x8 unpack_unorm4x8_to_float -#define unpackSnorm4x8 unpack_snorm4x8_to_float -#define unpackUnorm2x16 unpack_unorm2x16_to_float -#define unpackSnorm2x16 unpack_snorm2x16_to_float - -/* Texture size functions. Add texture types as needed. */ -#define imageSize(image) textureSize(image, 0) - -template -int textureSize(thread _mtl_combined_image_sampler_1d image, uint lod) -{ - return int(image.texture->get_width()); -} - -template -int2 textureSize(thread _mtl_combined_image_sampler_1d_array image, uint lod) -{ - return int2(image.texture->get_width(), image.texture->get_array_size()); -} - -template -int2 textureSize(thread _mtl_combined_image_sampler_2d image, uint lod) -{ - return int2(image.texture->get_width(lod), image.texture->get_height(lod)); -} - -template -int2 textureSize(thread _mtl_combined_image_sampler_depth_2d image, uint lod) -{ - return int2(image.texture->get_width(lod), image.texture->get_height(lod)); -} - -template -int3 textureSize(thread _mtl_combined_image_sampler_2d_array image, uint lod) -{ - return int3(image.texture->get_width(lod), - image.texture->get_height(lod), - image.texture->get_array_size()); -} - -template -int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array image, uint lod) -{ - return int3(image.texture->get_width(lod), - image.texture->get_height(lod), - image.texture->get_array_size()); -} - -template -int2 textureSize(thread _mtl_combined_image_sampler_cube image, uint lod) -{ - return int2(image.texture->get_width(lod), image.texture->get_height(lod)); -} - -template -int3 textureSize(thread _mtl_combined_image_sampler_3d image, uint lod) -{ - return int3(image.texture->get_width(lod), - image.texture->get_height(lod), - image.texture->get_depth(lod)); -} - #ifndef MTL_SUPPORTS_TEXTURE_ATOMICS /* textureSize functions for fallback atomic textures. */ template -int2 textureSize(thread _mtl_combined_image_sampler_2d_atomic_fallback image, uint lod) +int2 textureSize(thread _mtl_sampler_2d_atomic image, uint lod) { return int2(image.texture->get_width(lod), image.texture->get_height(lod)); } template -int3 textureSize(thread _mtl_combined_image_sampler_2d_array_atomic_fallback image, uint lod) +int3 textureSize(thread _mtl_sampler_2d_array_atomic image, uint lod) { return int3(image.texture_size); } template -int3 textureSize(thread _mtl_combined_image_sampler_3d_atomic_fallback image, uint lod) +int3 textureSize(thread _mtl_sampler_3d_atomic image, uint lod) { return int3(image.texture_size); } #endif +#define unpackUnorm4x8 unpack_unorm4x8_to_float +#define unpackSnorm4x8 unpack_snorm4x8_to_float +#define unpackUnorm2x16 unpack_unorm2x16_to_float +#define unpackSnorm2x16 unpack_snorm2x16_to_float + /* Equality and comparison functions. */ #define lessThan(a, b) ((a) < (b)) #define lessThanEqual(a, b) ((a) <= (b)) @@ -2184,49 +988,18 @@ int3 textureSize(thread _mtl_combined_image_sampler_3d_atomic_fallback ima #define equal(a, b) ((a) == (b)) #define notEqual(a, b) ((a) != (b)) -template bool all(vec x) -{ - bool _all = true; - for (int i = 0; i < n; i++) { - _all = _all && (x[i] > 0); - } - return _all; -} - -template bool any(vec x) -{ - bool _any = false; - for (int i = 0; i < n; i++) { - _any = _any || (x[i] > 0); - } - return _any; -} - /* Modulo functionality. */ -int _mtlmod(int a, int b) -{ - return a - b * (a / b); -} - -float _mtlmod(float a, float b) -{ - return a - b * floor(a / b); -} - -template vec _mtlmod(vec x, vec y) -{ - return x - (y * floor(x / y)); -} - -template vec _mtlmod(vec x, U y) -{ - return x - (vec(y) * floor(x / vec(y))); -} - -template vec _mtlmod(T x, vec y) -{ - return vec(x) - (y * floor(vec(x) / y)); -} +/* `mod(x, y)` is defined as `x - (y * floor(x / y))` in the metal specification. + * This is not compatible with GLSL implementation. So we override it with a compatible one. */ +#define mod(x, y) _compatible_mod(x, y) +#define MOD \ + { \ + return x - y * floor(x / y); \ + } +float _compatible_mod(float x, float y) MOD; +template vec _compatible_mod(vec x, float y) MOD; +template vec _compatible_mod(vec x, vec y) MOD; +#undef MOD /* Mathematical functions. */ template T atan(T y, T x) @@ -2249,10 +1022,7 @@ float4x4 inverse(float4x4 a) float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1]; float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1]; float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2]; - - float inv_det = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06); - - float4x4 adjoint{}; + float4x4 adjoint; adjoint[0][0] = a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09; adjoint[0][1] = a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09; adjoint[0][2] = a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03; @@ -2269,40 +1039,38 @@ float4x4 inverse(float4x4 a) adjoint[3][1] = a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06; adjoint[3][2] = a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00; adjoint[3][3] = a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00; - return adjoint * inv_det; + float determinant = b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06; + /* Multiplying by inverse since matrix types don't have divide operators. */ + return adjoint * (1.0 / determinant); } float3x3 inverse(float3x3 m) { - float b00 = m[1][1] * m[2][2] - m[2][1] * m[1][2]; - float b01 = m[0][1] * m[2][2] - m[2][1] * m[0][2]; - float b02 = m[0][1] * m[1][2] - m[1][1] * m[0][2]; - - float inv_det = 1.0 / (m[0][0] * b00 - m[1][0] * b01 + m[2][0] * b02); - - float3x3 adjoint{}; - adjoint[0][0] = +b00; - adjoint[0][1] = -b01; - adjoint[0][2] = +b02; + float3x3 adjoint; + adjoint[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]); + adjoint[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]); + adjoint[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]); adjoint[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]); adjoint[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]); adjoint[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]); adjoint[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]); adjoint[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]); adjoint[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]); - return adjoint * inv_det; + float determinant = m[0][0] * adjoint[0][0] + m[1][0] * adjoint[0][1] + m[2][0] * adjoint[0][2]; + /* Multiplying by inverse since matrix types don't have divide operators. */ + return adjoint * (1.0 / determinant); } float2x2 inverse(float2x2 m) { - float inv_det = 1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]); - - float2x2 adjoint{}; + float2x2 adjoint; adjoint[0][0] = +m[1][1]; adjoint[1][0] = -m[1][0]; adjoint[0][1] = -m[0][1]; adjoint[1][1] = +m[0][0]; - return adjoint * inv_det; + float determinant = m[0][0] * m[1][1] - m[1][0] * m[0][1]; + /* Multiplying by inverse since matrix types don't have divide operators. */ + return adjoint * (1.0 / determinant); } /* Additional overloads for builtin functions. */ @@ -2311,226 +1079,70 @@ float distance(float x, float y) return abs(y - x); } -/* Overload for mix(A, B, float ratio). */ -template vec mix(vec a, vec b, float val) +/* Overload for mix(vec, vec, float). */ +template vec mix(vec a, vec b, float fac) { - return mix(a, b, vec(val)); -} - -/* Overload for mix(A, B, bvec). */ -template -vec mix(vec a, vec b, vec mask) -{ - vec result; - for (int i = 0; i < Size; i++) { - result[i] = mask[i] ? b[i] : a[i]; - } - return result; + return a * (1.0 - fac) + b * fac; } /* Using vec does not appear to work, splitting cases. */ -/* Overload for mix(A, B, bvec). */ -template vec mix(vec a, vec b, bvec4 mask) -{ - vec result; - for (int i = 0; i < 4; i++) { - result[i] = mask[i] ? b[i] : a[i]; +#define SELECT \ + { \ + return select(a, b, mask); \ } - return result; -} -/* Overload for mix(A, B, bvec). */ -template vec mix(vec a, vec b, bvec3 mask) -{ - vec result; - for (int i = 0; i < 3; i++) { - result[i] = mask[i] ? b[i] : a[i]; - } - return result; -} +template vec mix(vec a, vec b, bool4 mask) SELECT; +template vec mix(vec a, vec b, bool3 mask) SELECT; +template vec mix(vec a, vec b, bool2 mask) SELECT; -/* Overload for mix(A, B, bvec). */ -template vec mix(vec a, vec b, bvec2 mask) -{ - vec result; - for (int i = 0; i < 2; i++) { - result[i] = mask[i] ? b[i] : a[i]; - } - return result; -} - -/* Overload for mix(A, B, bvec). */ -template T mix(T a, T b, bool mask) -{ - return (mask) ? b : a; -} +#undef SELECT template bool is_zero(vec a) { - for (int i = 0; i < Size; i++) { - if (a[i] != T(0)) { - return false; - } + return all(a == vec(0)); +} + +#define in +#define out thread +#define inout thread +#define _in_sta +#define _in_end +#define _out_sta (& +#define _out_end ) +#define _inout_sta (& +#define _inout_end ) + +#define shared threadgroup +#define _shared_sta (& +#define _shared_end ) + +/* Defines generated by glsl_preprocess that contains threadgroup variables related codegen. + * See glsl_preprocess for more detail. */ +#define MSL_SHARED_VARS_ARGS +#define MSL_SHARED_VARS_ASSIGN +#define MSL_SHARED_VARS_DECLARE +#define MSL_SHARED_VARS_PASS + +/* Matrix reshaping functions. */ +#define RESHAPE(mat_to, mat_from, ...) \ + mat_to to_##mat_to(mat_from m) \ + { \ + return mat_to(__VA_ARGS__); \ } - return true; -} -/** - * Matrix conversion fallback for functional style casting & constructors. - * To avoid name collision with the types, they are replaced with uppercase version - * before compilation. - */ +/* clang-format off */ +RESHAPE(float2x2, float3x3, m[0].xy, m[1].xy) +RESHAPE(float2x2, float4x4, m[0].xy, m[1].xy) +RESHAPE(float3x3, float4x4, m[0].xyz, m[1].xyz, m[2].xyz) +RESHAPE(float3x3, float2x2, m[0].x, m[0].y, 0, m[1].x, m[1].y, 0, 0, 0, 1) +RESHAPE(float4x4, float2x2, m[0].x, m[0].y, 0, 0, m[1].x, m[1].y, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1) +RESHAPE(float4x4, float3x3, m[0].x, m[0].y, m[0].z, 0, m[1].x, m[1].y, m[1].z, 0, m[2].x, m[2].y, m[2].z, 0, 0, 0, 0, 1) +/* clang-format on */ +/* TODO(fclem): Remove. Use Transform instead. */ +RESHAPE(float3x3, float3x4, m[0].xyz, m[1].xyz, m[2].xyz) +#undef RESHAPE -mat2 MAT2x2(vec2 a, vec2 b) -{ - return mat2(a, b); -} -mat2 MAT2x2(float a1, float a2, float b1, float b2) -{ - return mat2(vec2(a1, a2), vec2(b1, b2)); -} -mat2 MAT2x2(float f) -{ - return mat2(f); -} -mat2 MAT2x2(mat3 m) -{ - return mat2(m[0].xy, m[1].xy); -} -mat2 MAT2x2(mat4 m) -{ - return mat2(m[0].xy, m[1].xy); -} - -mat3 MAT3x3(vec3 a, vec3 b, vec3 c) -{ - return mat3(a, b, c); -} -mat3 MAT3x3( - float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3) -{ - return mat3(vec3(a1, a2, a3), vec3(b1, b2, b3), vec3(c1, c2, c3)); -} -mat3 MAT3x3(vec3 a, float b1, float b2, float b3, float c1, float c2, float c3) -{ - return mat3(a, vec3(b1, b2, b3), vec3(c1, c2, c3)); -} -mat3 MAT3x3(float f) -{ - return mat3(f); -} -mat3 MAT3x3(mat4 m) -{ - return mat3(m[0].xyz, m[1].xyz, m[2].xyz); -} -mat3 MAT3x3(mat3x4 m) -{ - return mat3(m[0].xyz, m[1].xyz, m[2].xyz); -} -mat3 MAT3x3(mat2 m) -{ - return mat3(vec3(m[0].xy, 0.0), vec3(m[1].xy, 0.0), vec3(0.0, 0.0, 1.0)); -} - -mat4 MAT4x4(vec4 a, vec4 b, vec4 c, vec4 d) -{ - return mat4(a, b, c, d); -} -mat4 MAT4x4(float a1, - float a2, - float a3, - float a4, - float b1, - float b2, - float b3, - float b4, - float c1, - float c2, - float c3, - float c4, - float d1, - float d2, - float d3, - float d4) -{ - return mat4( - vec4(a1, a2, a3, a4), vec4(b1, b2, b3, b4), vec4(c1, c2, c3, c4), vec4(d1, d2, d3, d4)); -} -mat4 MAT4x4(float f) -{ - return mat4(f); -} -mat4 MAT4x4(mat3 m) -{ - return mat4( - vec4(m[0].xyz, 0.0), vec4(m[1].xyz, 0.0), vec4(m[2].xyz, 0.0), vec4(0.0, 0.0, 0.0, 1.0)); -} -mat4 MAT4x4(mat3x4 m) -{ - return mat4(m[0], m[1], m[2], vec4(0.0, 0.0, 0.0, 1.0)); -} -mat4 MAT4x4(mat4x3 m) -{ - return mat4(m[0][0], - m[0][1], - m[0][2], - 0.0, - m[1][0], - m[1][1], - m[1][2], - 0.0, - m[2][0], - m[2][1], - m[2][2], - 0.0, - m[3][0], - m[3][1], - m[3][2], - 0.0); -} -mat4 MAT4x4(mat2 m) -{ - return mat4(vec4(m[0].xy, 0.0, 0.0), - vec4(m[1].xy, 0.0, 0.0), - vec4(0.0, 0.0, 1.0, 0.0), - vec4(0.0, 0.0, 0.0, 1.0)); -} - -mat3x4 MAT3x4(vec4 a, vec4 b, vec4 c) -{ - return mat3x4(a, b, c); -} -mat3x4 MAT3x4(float a1, - float a2, - float a3, - float a4, - float b1, - float b2, - float b3, - float b4, - float c1, - float c2, - float c3, - float c4) -{ - return mat3x4(vec4(a1, a2, a3, a4), vec4(b1, b2, b3, b4), vec4(c1, c2, c3, c4)); -} -mat3x4 MAT3x4(float f) -{ - return mat3x4(f); -} -mat3x4 MAT3x4(mat3 m) -{ - return mat3x4(vec4(m[0].xyz, 0.0), vec4(m[1].xyz, 0.0), vec4(m[2].xyz, 0.0)); -} -mat3x4 MAT3x4(mat2 m) -{ - return mat3x4(vec4(m[0].xy, 0.0, 0.0), vec4(m[1].xy, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0)); -} - -#define MAT2 MAT2x2 -#define MAT3 MAT3x3 -#define MAT4 MAT4x4 +#undef ENABLE_IF /* Array syntax compatibility. */ /* clang-format off */ @@ -2550,6 +1162,8 @@ mat3x4 MAT3x4(mat2 m) #define bool2_array(...) { __VA_ARGS__ } #define bool3_array(...) { __VA_ARGS__ } #define bool4_array(...) { __VA_ARGS__ } +#define ARRAY_T(type) +#define ARRAY_V(...) {__VA_ARGS__} /* clang-format on */ #define SHADER_LIBRARY_CREATE_INFO(a) diff --git a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl index e6f377a4cee..d06f312ed04 100644 --- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl +++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl @@ -6,6 +6,26 @@ /** IMPORTANT: Be wary of size and alignment matching for types that are present * in C++ shared code. */ +/* Matrix reshaping functions. Needs to be declared before matrix type aliases. */ +#define RESHAPE(name, mat_to, mat_from) \ + mat_to to_##name(mat_from m) \ + { \ + return mat_to(m); \ + } + +/* clang-format off */ +RESHAPE(float2x2, mat2x2, mat3x3) +RESHAPE(float2x2, mat2x2, mat4x4) +RESHAPE(float3x3, mat3x3, mat4x4) +RESHAPE(float3x3, mat3x3, mat2x2) +RESHAPE(float4x4, mat4x4, mat2x2) +RESHAPE(float4x4, mat4x4, mat3x3) +/* clang-format on */ +/* TODO(fclem): Remove. Use Transform instead. */ +RESHAPE(float3x3, mat3x3, mat3x4) + +#undef RESHAPE + /* Boolean in GLSL are 32bit in interface structs. */ #define bool32_t bool #define bool2 bvec2 @@ -121,8 +141,19 @@ bool is_zero(vec4 A) #define bool2_array bool2[] #define bool3_array bool3[] #define bool4_array bool4[] +#define ARRAY_T(type) type[] +#define ARRAY_V #define SHADER_LIBRARY_CREATE_INFO(a) #define VERTEX_SHADER_CREATE_INFO(a) #define FRAGMENT_SHADER_CREATE_INFO(a) #define COMPUTE_SHADER_CREATE_INFO(a) + +#define _in_sta +#define _in_end +#define _out_sta +#define _out_end +#define _inout_sta +#define _inout_end +#define _shared_sta +#define _shared_end diff --git a/source/blender/gpu/tests/shaders/gpu_math_test.glsl b/source/blender/gpu/tests/shaders/gpu_math_test.glsl index faca5a75219..5ca304d055d 100644 --- a/source/blender/gpu/tests/shaders/gpu_math_test.glsl +++ b/source/blender/gpu/tests/shaders/gpu_math_test.glsl @@ -70,22 +70,22 @@ void main() EulerXYZ euler = EulerXYZ(1, 2, 3); Quaternion quat = to_quaternion(euler); AxisAngle axis_angle = to_axis_angle(euler); - m = mat4(from_rotation(euler)); + m = to_float4x4(from_rotation(euler)); EXPECT_NEAR(m, expect, 1e-5); - m = mat4(from_rotation(quat)); + m = to_float4x4(from_rotation(quat)); EXPECT_NEAR(m, expect, 1e-5); - m = mat4(from_rotation(axis_angle)); + m = to_float4x4(from_rotation(axis_angle)); EXPECT_NEAR(m, expect, 3e-4); /* Has some precision issue on some platform. */ m = from_scale(vec4(1, 2, 3, 4)); expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 3, 0), vec4(0, 0, 0, 4)); EXPECT_TRUE(is_equal(m, expect, 0.00001)); - m = mat4(from_scale(vec3(1, 2, 3))); + m = to_float4x4(from_scale(vec3(1, 2, 3))); expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 3, 0), vec4(0, 0, 0, 1)); EXPECT_TRUE(is_equal(m, expect, 0.00001)); - m = mat4(from_scale(vec2(1, 2))); + m = to_float4x4(from_scale(vec2(1, 2))); expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 1, 0), vec4(0, 0, 0, 1)); EXPECT_TRUE(is_equal(m, expect, 0.00001)); @@ -182,8 +182,8 @@ void main() EulerXYZ eul; Quaternion qt; vec3 scale; - to_rot_scale(mat3x3(m), eul, scale); - to_rot_scale(mat3x3(m), qt, scale); + to_rot_scale(to_float3x3(m), eul, scale); + to_rot_scale(to_float3x3(m), qt, scale); EXPECT_NEAR(scale, expect_scale, 0.00001); EXPECT_NEAR(as_vec4(qt), as_vec4(expect_qt), 0.0002); EXPECT_NEAR(as_vec3(eul), as_vec3(expect_eul), 0.0002); diff --git a/source/blender/gpu/vulkan/vk_device.cc b/source/blender/gpu/vulkan/vk_device.cc index 96501ea4298..6d4a1df4e32 100644 --- a/source/blender/gpu/vulkan/vk_device.cc +++ b/source/blender/gpu/vulkan/vk_device.cc @@ -236,9 +236,6 @@ void VKDevice::init_glsl_patch() ss << "#define gpu_BaryCoordNoPersp gl_BaryCoordNoPerspEXT\n"; } - ss << "#define DFDX_SIGN 1.0\n"; - ss << "#define DFDY_SIGN 1.0\n"; - /* GLSL Backend Lib. */ ss << datatoc_glsl_shader_defines_glsl; glsl_patch_ = ss.str(); diff --git a/source/blender/python/gpu/gpu_py_shader.cc b/source/blender/python/gpu/gpu_py_shader.cc index 08f2e4e9ac4..c54f997c405 100644 --- a/source/blender/python/gpu/gpu_py_shader.cc +++ b/source/blender/python/gpu/gpu_py_shader.cc @@ -1009,7 +1009,7 @@ static PyObject *pygpu_shader_create_from_info(BPyGPUShader * /*self*/, BPyGPUSh return nullptr; } - GPUShader *shader = GPU_shader_create_from_info(o->info); + GPUShader *shader = GPU_shader_create_from_info_python(o->info); if (!shader) { PyErr_SetString(PyExc_Exception, "Shader Compile Error, see console for more details"); return nullptr;