Metal: Simplify MSL translation

Move most of the string preprocessing used for MSL
compatibility to `glsl_preprocess`.

Enforce some changes like matrix constructor and
array constructor to the GLSL codebase. This is
for C++ compatibility.

Additionally reduce the amount of code duplication
inside the compatibility code.

Pull Request: https://projects.blender.org/blender/blender/pulls/128634
This commit is contained in:
Clément Foucault
2024-10-07 12:54:10 +02:00
committed by Clément Foucault
parent 9e604f0e00
commit 9c0321ae9b
64 changed files with 1519 additions and 2928 deletions

View File

@@ -35,7 +35,7 @@ void main()
* boundary condition, so we initialize all inputs by the boundary pixel. */
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
vec4 input_boundary = texture_load(input_tx, boundary_texel);
vec4 inputs[FILTER_ORDER + 1] = vec4[](
vec4 inputs[FILTER_ORDER + 1] = float4_array(
input_boundary, input_boundary, input_boundary, input_boundary, input_boundary);
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
@@ -44,7 +44,7 @@ void main()
* boundary coefficient. See the DericheGaussianCoefficients class for more information on the
* boundary handing. */
vec4 output_boundary = input_boundary * boundary_coefficient;
vec4 outputs[FILTER_ORDER + 1] = vec4[](
vec4 outputs[FILTER_ORDER + 1] = float4_array(
output_boundary, output_boundary, output_boundary, output_boundary, output_boundary);
for (int x = 0; x < width; x++) {

View File

@@ -16,8 +16,9 @@ void main()
int radius = max(0, size);
#endif
vec4 mean_of_squared_color_of_quadrants[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
vec4 mean_of_color_of_quadrants[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
vec4 mean_of_squared_color_of_quadrants[4] = float4_array(
vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
vec4 mean_of_color_of_quadrants[4] = float4_array(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
/* Compute the above statistics for each of the quadrants around the current pixel. */
for (int q = 0; q < 4; q++) {

View File

@@ -11,7 +11,7 @@ void main()
vec2 coordinates = (vec2(texel) + vec2(0.5)) / output_size;
vec3 transformed_coordinates = mat3(homography_matrix) * vec3(coordinates, 1.0);
vec3 transformed_coordinates = to_float3x3(homography_matrix) * vec3(coordinates, 1.0);
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;
/* The derivatives of the projected coordinates with respect to x and y are the first and

View File

@@ -8,7 +8,7 @@ void main()
vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(mask_img));
vec3 transformed_coordinates = mat3(homography_matrix) * vec3(coordinates, 1.0);
vec3 transformed_coordinates = to_float3x3(homography_matrix) * vec3(coordinates, 1.0);
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;
bool is_inside_plane = all(greaterThanEqual(projected_coordinates, vec2(0.0))) &&

View File

@@ -13,7 +13,7 @@ void main()
vec4 accumulated_color = vec4(0.0);
for (int i = 0; i < number_of_motion_blur_samples; i++) {
mat3 homography_matrix = mat3(homography_matrices[i]);
mat3 homography_matrix = to_float3x3(homography_matrices[i]);
vec3 transformed_coordinates = homography_matrix * vec3(coordinates, 1.0);
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;

View File

@@ -10,7 +10,7 @@ void main()
float accumulated_mask = 0.0;
for (int i = 0; i < number_of_motion_blur_samples; i++) {
mat3 homography_matrix = mat3(homography_matrices[i]);
mat3 homography_matrix = to_float3x3(homography_matrices[i]);
vec3 transformed_coordinates = homography_matrix * vec3(coordinates, 1.0);
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;

View File

@@ -15,7 +15,7 @@ void main()
/* Transform the input image by transforming the domain coordinates with the inverse of input
* image's transformation. The inverse transformation is an affine matrix and thus the
* coordinates should be in homogeneous coordinates. */
coordinates = (mat3(inverse_transformation) * vec3(coordinates, 1.0)).xy;
coordinates = (to_float3x3(inverse_transformation) * vec3(coordinates, 1.0)).xy;
/* Subtract the offset and divide by the input image size to get the relevant coordinates into
* the sampler's expected [0, 1] range. */

View File

@@ -56,7 +56,7 @@ void main()
* boundary condition, so we initialize all inputs by the boundary pixel. */
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
vec4 input_boundary = texture_load(input_tx, boundary_texel);
vec4 inputs[FILTER_ORDER + 1] = vec4[](input_boundary, input_boundary, input_boundary);
vec4 inputs[FILTER_ORDER + 1] = float4_array(input_boundary, input_boundary, input_boundary);
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
* current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
@@ -64,7 +64,7 @@ void main()
* boundary coefficient. See the VanVlietGaussianCoefficients class for more information on the
* boundary handing. */
vec4 output_boundary = input_boundary * boundary_coefficient;
vec4 outputs[FILTER_ORDER + 1] = vec4[](output_boundary, output_boundary, output_boundary);
vec4 outputs[FILTER_ORDER + 1] = float4_array(output_boundary, output_boundary, output_boundary);
for (int x = 0; x < width; x++) {
/* Run forward across rows for the causal filter and backward for the non causal filter. */

View File

@@ -57,8 +57,8 @@ const float dof_layer_offset = 0.5 + 0.5;
const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS;
const uvec2 quad_offsets_u[4] = uvec2[4](uvec2(0, 1), uvec2(1, 1), uvec2(1, 0), uvec2(0, 0));
const vec2 quad_offsets[4] = vec2[4](
const uvec2 quad_offsets_u[4] = uint2_array(uvec2(0, 1), uvec2(1, 1), uvec2(1, 0), uvec2(0, 0));
const vec2 quad_offsets[4] = float2_array(
vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5));
/** \} */

View File

@@ -80,7 +80,7 @@ vec3 dof_neighborhood_clamp(vec2 frag_coord, vec3 color, float center_coc, float
{
/* Stabilize color by clamping with the stable half res neighborhood. */
vec3 neighbor_min, neighbor_max;
const vec2 corners[4] = vec2[4](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));
const vec2 corners[4] = float2_array(vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));
for (int i = 0; i < 4; i++) {
/**
* Visit the 4 half-res texels around (and containing) the full-resolution texel.

View File

@@ -125,7 +125,7 @@ float dof_bilateral_weight(float reference_coc, float sample_coc)
DofSample dof_spatial_filtering()
{
/* Plus (+) shape offsets. */
const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
const ivec2 plus_offsets[4] = int2_array(ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
DofSample center = dof_fetch_input_sample(ivec2(0));
DofSample accum = DofSample(vec4(0.0), 0.0);
float accum_weight = 0.0;
@@ -165,7 +165,7 @@ struct DofNeighborhoodMinMax {
DofNeighborhoodMinMax dof_neighbor_boundbox()
{
/* Plus (+) shape offsets. */
const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
const ivec2 plus_offsets[4] = int2_array(ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
/**
* Simple bounding box calculation in YCoCg as described in:
* "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014
@@ -183,7 +183,7 @@ DofNeighborhoodMinMax dof_neighbor_boundbox()
* Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
DofSample min_c_3x3 = min_c;
DofSample max_c_3x3 = max_c;
const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
const ivec2 corners[4] = int2_array(ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
for (int i = 0; i < 4; i++) {
DofSample samp = dof_fetch_input_sample(corners[i]);
min_c_3x3.color = min(min_c_3x3.color, samp.color);
@@ -206,7 +206,7 @@ vec2 dof_pixel_history_motion_vector(ivec2 texel_sample)
* Dilate velocity by using the nearest pixel in a cross pattern.
* "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014 (Slide 27)
*/
const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
const ivec2 corners[4] = int2_array(ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
float min_depth = dof_fetch_half_depth(ivec2(0));
ivec2 nearest_texel = ivec2(0);
for (int i = 0; i < 4; i++) {

View File

@@ -9,13 +9,13 @@ void main()
{
/* Constant array moved inside function scope.
* Minimizes local register allocation in MSL. */
const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0),
vec2(1.0, -1.0),
vec2(-1.0, 1.0),
const vec2 pos[6] = float2_array(vec2(-1.0, -1.0),
vec2(1.0, -1.0),
vec2(-1.0, 1.0),
vec2(1.0, -1.0),
vec2(1.0, 1.0),
vec2(-1.0, 1.0));
vec2(1.0, -1.0),
vec2(1.0, 1.0),
vec2(-1.0, 1.0));
vec2 lP = pos[gl_VertexID % 6];
int display_index = gl_VertexID / 6;

View File

@@ -9,13 +9,13 @@ void main()
{
/* Constant array moved inside function scope.
* Minimizes local register allocation in MSL. */
const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0),
vec2(1.0, -1.0),
vec2(-1.0, 1.0),
const vec2 pos[6] = float2_array(vec2(-1.0, -1.0),
vec2(1.0, -1.0),
vec2(-1.0, 1.0),
vec2(1.0, -1.0),
vec2(1.0, 1.0),
vec2(-1.0, 1.0));
vec2(1.0, -1.0),
vec2(1.0, 1.0),
vec2(-1.0, 1.0));
lP = pos[gl_VertexID % 6];
int display_index = gl_VertexID / 6;

View File

@@ -9,13 +9,13 @@ void main()
{
/* Constant array moved inside function scope.
* Minimizes local register allocation in MSL. */
const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0),
vec2(1.0, -1.0),
vec2(-1.0, 1.0),
const vec2 pos[6] = float2_array(vec2(-1.0, -1.0),
vec2(1.0, -1.0),
vec2(-1.0, 1.0),
vec2(1.0, -1.0),
vec2(1.0, 1.0),
vec2(-1.0, 1.0));
vec2(1.0, -1.0),
vec2(1.0, 1.0),
vec2(-1.0, 1.0));
lP = pos[gl_VertexID % 6];
int cell_index = gl_VertexID / 6;

View File

@@ -195,7 +195,7 @@ void film_cryptomatte_layer_accum_and_store(
return;
}
/* x = hash, y = accumulated weight. Only keep track of 4 highest weighted samples. */
vec2 crypto_samples[4] = vec2[4](vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0));
vec2 crypto_samples[4] = float2_array(vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0));
for (int i = 0; i < samples_len; i++) {
FilmSample src = film_sample_get(i, texel_film);
film_sample_cryptomatte_accum(src, layer_component, cryptomatte_tx, crypto_samples);
@@ -246,7 +246,7 @@ vec2 film_pixel_history_motion_vector(ivec2 texel_sample)
* Dilate velocity by using the nearest pixel in a cross pattern.
* "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014 (Slide 27)
*/
const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
const ivec2 corners[4] = int2_array(ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
float min_depth = texelFetch(depth_tx, texel_sample, 0).x;
ivec2 nearest_texel = texel_sample;
for (int i = 0; i < 4; i++) {
@@ -342,11 +342,11 @@ vec4 film_sample_catmull_rom(sampler2D color_tx, vec2 input_texel)
void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c)
{
/* Plus (+) shape offsets. */
const ivec2 plus_offsets[5] = ivec2[5](ivec2(0, 0), /* Center */
ivec2(-1, 0),
ivec2(0, -1),
ivec2(1, 0),
ivec2(0, 1));
const ivec2 plus_offsets[5] = int2_array(ivec2(0, 0), /* Center */
ivec2(-1, 0),
ivec2(0, -1),
ivec2(1, 0),
ivec2(0, 1));
#if 0
/**
* Compute Variance of neighborhood as described in:
@@ -389,7 +389,7 @@ void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c
* Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
vec4 min_c_3x3 = min_c;
vec4 max_c_3x3 = max_c;
const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
const ivec2 corners[4] = int2_array(ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
for (int i = 0; i < 4; i++) {
vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + corners[i]);
min_c_3x3 = min(min_c_3x3, color);

View File

@@ -220,22 +220,22 @@ void cubeface_sync(int tilemap_id,
/* Use switch instead of inline array of float3x3. */
switch (cubeface) {
case Z_NEG:
viewmat = mat4x4(mat3x3(+1, +0, +0, +0, +1, +0, +0, +0, +1)) * viewmat;
viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, +1, +0, +0, +0, +1)) * viewmat;
break;
case X_POS:
viewmat = mat4x4(mat3x3(+0, +0, -1, -1, +0, +0, +0, +1, +0)) * viewmat;
viewmat = to_float4x4(mat3x3(+0, +0, -1, -1, +0, +0, +0, +1, +0)) * viewmat;
break;
case X_NEG:
viewmat = mat4x4(mat3x3(+0, +0, +1, +1, +0, +0, +0, +1, +0)) * viewmat;
viewmat = to_float4x4(mat3x3(+0, +0, +1, +1, +0, +0, +0, +1, +0)) * viewmat;
break;
case Y_POS:
viewmat = mat4x4(mat3x3(+1, +0, +0, +0, +0, -1, +0, +1, +0)) * viewmat;
viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, +0, -1, +0, +1, +0)) * viewmat;
break;
case Y_NEG:
viewmat = mat4x4(mat3x3(-1, +0, +0, +0, +0, +1, +0, +1, +0)) * viewmat;
viewmat = to_float4x4(mat3x3(-1, +0, +0, +0, +0, +1, +0, +1, +0)) * viewmat;
break;
case Z_POS:
viewmat = mat4x4(mat3x3(+1, +0, +0, +0, -1, +0, +0, +0, -1)) * viewmat;
viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, -1, +0, +0, +0, -1)) * viewmat;
break;
}
@@ -289,7 +289,7 @@ void main()
* set to 0 only when the light radius is also 0 to detect this case. */
}
else {
light.object_to_world = transform_from_matrix(mat4x4(from_up_axis(shadow_direction)));
light.object_to_world = transform_from_matrix(to_float4x4(from_up_axis(shadow_direction)));
}
}

View File

@@ -49,7 +49,7 @@ vec3 lightprobe_sphere_parallax(SphereProbeData probe, vec3 P, vec3 L)
}
/* Correct reflection ray using parallax volume intersection. */
vec3 lP = vec4(P, 1.0) * probe.world_to_probe_transposed;
vec3 lL = (mat3x3(probe.world_to_probe_transposed) * L) / probe.parallax_distance;
vec3 lL = (to_float3x3(probe.world_to_probe_transposed) * L) / probe.parallax_distance;
float dist = (probe.parallax_shape == SHAPE_ELIPSOID) ? line_unit_sphere_intersect_dist(lP, lL) :
line_unit_box_intersect_dist(lP, lL);

View File

@@ -170,7 +170,7 @@ SphericalHarmonicL1 lightprobe_volume_sample(
VolumeProbeData grid_data = grids_infos_buf[index];
mat3x3 world_to_grid_transposed = mat3x3(grid_data.world_to_grid_transposed);
mat3x3 world_to_grid_transposed = to_float3x3(grid_data.world_to_grid_transposed);
vec3 lNg = safe_normalize(Ng * world_to_grid_transposed);
vec3 lV = safe_normalize(V * world_to_grid_transposed);

View File

@@ -104,7 +104,8 @@ void main()
}
/* Rotate Spherical Harmonic into world space. */
mat3 grid_to_world_rot = normalize(mat3(grids_infos_buf[grid_index].world_to_grid_transposed));
mat3 grid_to_world_rot = normalize(
to_float3x3(grids_infos_buf[grid_index].world_to_grid_transposed));
sh_local = spherical_harmonics_rotate(grid_to_world_rot, sh_local);
SphericalHarmonicL1 sh_visibility;

View File

@@ -36,7 +36,7 @@ void main()
sp = bone_mat * sp.xzy + headSphere.xyz;
nor = bone_mat * nor.xzy;
normalView = mat3(drw_view.viewmat) * nor;
normalView = to_float3x3(drw_view.viewmat) * nor;
finalStateColor = stateColor;
finalBoneColor = boneColor;

View File

@@ -34,7 +34,7 @@ VertOut vertex_main(VertIn v_in)
/* This is slow and run per vertex, but it's still faster than
* doing it per instance on CPU and sending it on via instance attribute. */
mat3 normal_mat = transpose(inverse(mat3(model_mat)));
mat3 normal_mat = transpose(inverse(to_float3x3(model_mat)));
VertOut v_out;
v_out.vPos = view_pos.xyz;

View File

@@ -39,7 +39,7 @@ void do_vertex_shader(mat4 in_inst_obmat,
/* This is slow and run per vertex, but it's still faster than
* doing it per instance on CPU and sending it on via instance attribute. */
mat3 normal_mat = transpose(inverse(mat3(model_mat)));
mat3 normal_mat = transpose(inverse(to_float3x3(model_mat)));
out_ssPos = proj(out_pPos);

View File

@@ -15,7 +15,7 @@ void main()
/* This is slow and run per vertex, but it's still faster than
* doing it per instance on CPU and sending it on via instance attribute. */
mat3 normal_mat = transpose(inverse(mat3(model_mat)));
mat3 normal_mat = transpose(inverse(to_float3x3(model_mat)));
vec3 normal = normalize(normal_world_to_view(normal_mat * nor));
inverted = int(dot(cross(model_mat[0].xyz, model_mat[1].xyz), model_mat[2].xyz) < 0.0);

View File

@@ -27,7 +27,7 @@ void main()
* In perspective mode it's also the view-space position
* of the sphere center. */
vec3 cam_ray = (is_persp) ? model_view_matrix[3].xyz : vec3(0.0, 0.0, -1.0);
cam_ray = mat3(sphereMatrix) * cam_ray;
cam_ray = to_float3x3(sphereMatrix) * cam_ray;
/* Sphere center distance from the camera (persp) in local space. */
float cam_dist = length(cam_ray);

View File

@@ -25,7 +25,7 @@ void main()
* In perspective mode it's also the view-space position
* of the sphere center. */
vec3 cam_ray = (is_persp) ? model_view_matrix[3].xyz : vec3(0.0, 0.0, -1.0);
cam_ray = mat3(sphereMatrix) * cam_ray;
cam_ray = to_float3x3(sphereMatrix) * cam_ray;
/* Sphere center distance from the camera (persp) in local space. */
float cam_dist = length(cam_ray);

View File

@@ -11,10 +11,10 @@ float dither(void)
{
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
* memory pressure. */
const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
const vec4 dither_mat4x4[4] = float4_array(vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
ivec2 co = ivec2(gl_FragCoord.xy) % 4;
return dither_mat4x4[co.x][co.y];

View File

@@ -8,7 +8,7 @@
void main()
{
mat3 imat = mat3(ModelMatrixInverse);
mat3 imat = to_float3x3(ModelMatrixInverse);
vec3 right = normalize(imat * ViewMatrixInverse[0].xyz);
vec3 up = normalize(imat * ViewMatrixInverse[1].xyz);
#ifdef VERTEX_PULL

View File

@@ -61,7 +61,7 @@ void main()
thick_time = thickness * (thick_time * 2.0 - 1.0);
/* Take object scale into account.
* NOTE: This only works fine with uniform scaling. */
float scale = 1.0 / length(mat3(ModelMatrixInverse) * binor);
float scale = 1.0 / length(to_float3x3(ModelMatrixInverse) * binor);
world_pos = center_wpos + binor * thick_time * scale;
}
else {

View File

@@ -80,7 +80,7 @@ void main()
/* Weights are available */
else {
float weight = weight_interp.x;
vec4 weight_color = texture(colorramp, weight, 0);
vec4 weight_color = texture(colorramp, weight);
weight_color = apply_color_fac(weight_color);
/* Contour display */

View File

@@ -87,14 +87,14 @@ void main()
#endif
/* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
* memory pressure. */
const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
const int indices[8] = int_array(0, 1, 1, 2, 2, 3, 3, 0);
/* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
* overlapping of the outlines. */
const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
vec3(0.45, 0.45, 0.0),
vec3(0.45, -0.45, 0.0),
vec3(-0.45, -0.45, 0.0));
const vec3 corners[4] = float3_array(vec3(-0.45, 0.45, 0.0),
vec3(0.45, 0.45, 0.0),
vec3(0.45, -0.45, 0.0),
vec3(-0.45, -0.45, 0.0));
vec3 pos = domainOriginOffset + cellSize * (vec3(cell_co + adaptiveCellOffset) + cell_offset);
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 8]];

View File

@@ -178,12 +178,12 @@ void main()
# ifdef USE_NEEDLE
/* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
* memory pressure. */
const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
vec3(0.0, 0.0, 0.5));
const vec3 corners[4] = float3_array(vec3(0.0, 0.2, -0.5),
vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
vec3(0.0, 0.0, 0.5));
const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
const int indices[12] = int_array(0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 12]];
pos += rotated_pos * vector_length * displaySize * cellSize;

View File

@@ -100,7 +100,7 @@ void main()
if (isHair) {
mat4 obmat = hairDupliMatrix;
wpos = (obmat * vec4(pos, 1.0)).xyz;
wnor = -normalize(mat3(obmat) * nor);
wnor = -normalize(to_float3x3(obmat) * nor);
}
bool is_persp = (drw_view.winmat[3][3] == 0.0);

View File

@@ -190,10 +190,10 @@ vec4 volume_integration(vec3 ray_ori, vec3 ray_dir, float ray_inc, float ray_max
{
/* NOTE: Constant array declared inside function scope to reduce shader core thread memory
* pressure on Apple Silicon. */
const vec4 dither_mat[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
const vec4 dither_mat[4] = float4_array(vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
/* Start with full transmittance and no scattered light. */
vec3 final_scattering = vec3(0.0);
float final_transmittance = 1.0;

View File

@@ -176,7 +176,7 @@ void drw_debug_sphere(vec3 p, float radius)
*/
void drw_debug_matrix(mat4 mat, vec4 v_color)
{
vec4 p[4] = vec4[4](vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1));
vec4 p[4] = float4_array(vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1));
for (int i = 0; i < 4; i++) {
p[i] = mat * p[i];
p[i].xyz /= p[i].w;
@@ -195,14 +195,14 @@ void drw_debug_matrix(mat4 mat)
*/
void drw_debug_matrix_as_bbox(mat4 mat, vec4 v_color)
{
vec4 p[8] = vec4[8](vec4(-1, -1, -1, 1),
vec4(1, -1, -1, 1),
vec4(1, 1, -1, 1),
vec4(-1, 1, -1, 1),
vec4(-1, -1, 1, 1),
vec4(1, -1, 1, 1),
vec4(1, 1, 1, 1),
vec4(-1, 1, 1, 1));
vec4 p[8] = float4_array(vec4(-1, -1, -1, 1),
vec4(1, -1, -1, 1),
vec4(1, 1, -1, 1),
vec4(-1, 1, -1, 1),
vec4(-1, -1, 1, 1),
vec4(1, -1, 1, 1),
vec4(1, 1, 1, 1),
vec4(-1, 1, 1, 1));
for (int i = 0; i < 8; i++) {
p[i] = mat * p[i];
p[i].xyz /= p[i].w;

View File

@@ -124,7 +124,7 @@ void drw_print_value_binary(uint value)
{
drw_print_no_endl("0b");
drw_print_string_start(10u * 4u);
uint digits[10] = uint[10](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
uint digits[10] = uint_array(0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
uint digit = 0u;
for (uint i = 0u; i < 32u; i++) {
drw_print_append_digit(((value >> i) & 1u), digits[digit / 4u]);
@@ -155,7 +155,7 @@ void drw_print_value_uint(uint value, const bool hex, bool is_negative, const bo
drw_print_string_start(3u * 4u);
const uint blank_value = hex ? 0x30303030u : 0x20202020u;
const uint prefix = hex ? 0x78302020u : 0x20202020u;
uint digits[3] = uint[3](blank_value, blank_value, prefix);
uint digits[3] = uint_array(blank_value, blank_value, prefix);
const uint base = hex ? 16u : 10u;
uint digit = 0u;
/* Add `u` suffix. */
@@ -247,7 +247,7 @@ void drw_print_value(float val)
* and exponent (4). */
const float significant_digits = 6.0;
drw_print_string_start(3u * 4u);
uint digits[3] = uint[3](0x20202020u, 0x20202020u, 0x20202020u);
uint digits[3] = uint_array(0x20202020u, 0x20202020u, 0x20202020u);
float exponent = floor(log(abs(val)) / log(10.0));
bool display_exponent = exponent >= (significant_digits) ||

View File

@@ -210,7 +210,7 @@ void hair_get_center_pos_tan_binor_time(bool is_persp,
mat4 obmat = hairDupliMatrix;
wpos = (obmat * vec4(wpos, 1.0)).xyz;
wtan = -normalize(mat3(obmat) * wtan);
wtan = -normalize(to_float3x3(obmat) * wtan);
vec3 camera_vec = (is_persp) ? camera_pos - wpos : camera_z;
wbinor = normalize(cross(camera_vec, wtan));
@@ -236,7 +236,7 @@ void hair_get_pos_tan_binor_time(bool is_persp,
thick_time = thickness * (thick_time * 2.0 - 1.0);
/* Take object scale into account.
* NOTE: This only works fine with uniform scaling. */
float scale = 1.0 / length(mat3(invmodel_mat) * wbinor);
float scale = 1.0 / length(to_float3x3(invmodel_mat) * wbinor);
wpos += wbinor * thick_time * scale;
}
else {

View File

@@ -41,7 +41,7 @@ mat3 mul(mat3 m1, mat3 m2)
# ifndef GPU_SHADER_MATH_MATRIX_LIB_GLSL
vec3 transform_direction(mat4 m, vec3 v)
{
return mat3(m) * v;
return to_float3x3(m) * v;
}
vec3 transform_point(mat4 m, vec3 v)
{

View File

@@ -37,7 +37,7 @@ void pointcloud_get_pos_and_radius(out vec3 outpos, out float outradius)
int id = pointcloud_get_point_id();
vec4 pos_rad = texelFetch(ptcloud_pos_rad_tx, id);
outpos = point_object_to_world(pos_rad.xyz);
outradius = dot(abs(mat3(ModelMatrix) * pos_rad.www), vec3(1.0 / 3.0));
outradius = dot(abs(to_float3x3(ModelMatrix) * pos_rad.www), vec3(1.0 / 3.0));
}
/* Return world position and normal. */

View File

@@ -227,14 +227,14 @@ uniform mat4 ModelMatrixInverse;
* NOTE: This is only valid because we are only using the mat3 of the ViewMatrixInverse.
* ViewMatrix * transpose(ModelMatrixInverse)
*/
# define NormalMatrix transpose(mat3(ModelMatrixInverse))
# define NormalMatrixInverse transpose(mat3(ModelMatrix))
# define NormalMatrix transpose(to_float3x3(ModelMatrixInverse))
# define NormalMatrixInverse transpose(to_float3x3(ModelMatrix))
# define normal_object_to_view(n) (mat3(ViewMatrix) * (NormalMatrix * n))
# define normal_object_to_view(n) (to_float3x3(ViewMatrix) * (NormalMatrix * n))
# define normal_object_to_world(n) (NormalMatrix * n)
# define normal_world_to_object(n) (NormalMatrixInverse * n)
# define normal_world_to_view(n) (mat3(ViewMatrix) * n)
# define normal_view_to_world(n) (mat3(ViewMatrixInverse) * n)
# define normal_world_to_view(n) (to_float3x3(ViewMatrix) * n)
# define normal_view_to_world(n) (to_float3x3(ViewMatrixInverse) * n)
# define point_object_to_ndc(p) \
(ProjectionMatrix * (ViewMatrix * vec4((ModelMatrix * vec4(p, 1.0)).xyz, 1.0)))

View File

@@ -9,102 +9,102 @@
bool char_intersect(uvec2 bitmap_position)
{
/* Using 8x8 = 64bits = uvec2. */
uvec2 ascii_bitmap[96] = uvec2[96](uvec2(0x00000000u, 0x00000000u),
uvec2(0x18001800u, 0x183c3c18u),
uvec2(0x00000000u, 0x36360000u),
uvec2(0x7f363600u, 0x36367f36u),
uvec2(0x301f0c00u, 0x0c3e031eu),
uvec2(0x0c666300u, 0x00633318u),
uvec2(0x3b336e00u, 0x1c361c6eu),
uvec2(0x00000000u, 0x06060300u),
uvec2(0x060c1800u, 0x180c0606u),
uvec2(0x180c0600u, 0x060c1818u),
uvec2(0x3c660000u, 0x00663cffu),
uvec2(0x0c0c0000u, 0x000c0c3fu),
uvec2(0x000c0c06u, 0x00000000u),
uvec2(0x00000000u, 0x0000003fu),
uvec2(0x000c0c00u, 0x00000000u),
uvec2(0x06030100u, 0x6030180cu),
uvec2(0x6f673e00u, 0x3e63737bu),
uvec2(0x0c0c3f00u, 0x0c0e0c0cu),
uvec2(0x06333f00u, 0x1e33301cu),
uvec2(0x30331e00u, 0x1e33301cu),
uvec2(0x7f307800u, 0x383c3633u),
uvec2(0x30331e00u, 0x3f031f30u),
uvec2(0x33331e00u, 0x1c06031fu),
uvec2(0x0c0c0c00u, 0x3f333018u),
uvec2(0x33331e00u, 0x1e33331eu),
uvec2(0x30180e00u, 0x1e33333eu),
uvec2(0x000c0c00u, 0x000c0c00u),
uvec2(0x000c0c06u, 0x000c0c00u),
uvec2(0x060c1800u, 0x180c0603u),
uvec2(0x003f0000u, 0x00003f00u),
uvec2(0x180c0600u, 0x060c1830u),
uvec2(0x0c000c00u, 0x1e333018u),
uvec2(0x7b031e00u, 0x3e637b7bu),
uvec2(0x3f333300u, 0x0c1e3333u),
uvec2(0x66663f00u, 0x3f66663eu),
uvec2(0x03663c00u, 0x3c660303u),
uvec2(0x66361f00u, 0x1f366666u),
uvec2(0x16467f00u, 0x7f46161eu),
uvec2(0x16060f00u, 0x7f46161eu),
uvec2(0x73667c00u, 0x3c660303u),
uvec2(0x33333300u, 0x3333333fu),
uvec2(0x0c0c1e00u, 0x1e0c0c0cu),
uvec2(0x33331e00u, 0x78303030u),
uvec2(0x36666700u, 0x6766361eu),
uvec2(0x46667f00u, 0x0f060606u),
uvec2(0x6b636300u, 0x63777f7fu),
uvec2(0x73636300u, 0x63676f7bu),
uvec2(0x63361c00u, 0x1c366363u),
uvec2(0x06060f00u, 0x3f66663eu),
uvec2(0x3b1e3800u, 0x1e333333u),
uvec2(0x36666700u, 0x3f66663eu),
uvec2(0x38331e00u, 0x1e33070eu),
uvec2(0x0c0c1e00u, 0x3f2d0c0cu),
uvec2(0x33333f00u, 0x33333333u),
uvec2(0x331e0c00u, 0x33333333u),
uvec2(0x7f776300u, 0x6363636bu),
uvec2(0x1c366300u, 0x6363361cu),
uvec2(0x0c0c1e00u, 0x3333331eu),
uvec2(0x4c667f00u, 0x7f633118u),
uvec2(0x06061e00u, 0x1e060606u),
uvec2(0x30604000u, 0x03060c18u),
uvec2(0x18181e00u, 0x1e181818u),
uvec2(0x00000000u, 0x081c3663u),
uvec2(0x000000ffu, 0x00000000u),
uvec2(0x00000000u, 0x0c0c1800u),
uvec2(0x3e336e00u, 0x00001e30u),
uvec2(0x66663b00u, 0x0706063eu),
uvec2(0x03331e00u, 0x00001e33u),
uvec2(0x33336e00u, 0x3830303eu),
uvec2(0x3f031e00u, 0x00001e33u),
uvec2(0x06060f00u, 0x1c36060fu),
uvec2(0x333e301fu, 0x00006e33u),
uvec2(0x66666700u, 0x0706366eu),
uvec2(0x0c0c1e00u, 0x0c000e0cu),
uvec2(0x3033331eu, 0x30003030u),
uvec2(0x1e366700u, 0x07066636u),
uvec2(0x0c0c1e00u, 0x0e0c0c0cu),
uvec2(0x7f6b6300u, 0x0000337fu),
uvec2(0x33333300u, 0x00001f33u),
uvec2(0x33331e00u, 0x00001e33u),
uvec2(0x663e060fu, 0x00003b66u),
uvec2(0x333e3078u, 0x00006e33u),
uvec2(0x66060f00u, 0x00003b6eu),
uvec2(0x1e301f00u, 0x00003e03u),
uvec2(0x0c2c1800u, 0x080c3e0cu),
uvec2(0x33336e00u, 0x00003333u),
uvec2(0x331e0c00u, 0x00003333u),
uvec2(0x7f7f3600u, 0x0000636bu),
uvec2(0x1c366300u, 0x00006336u),
uvec2(0x333e301fu, 0x00003333u),
uvec2(0x0c263f00u, 0x00003f19u),
uvec2(0x0c0c3800u, 0x380c0c07u),
uvec2(0x18181800u, 0x18181800u),
uvec2(0x0c0c0700u, 0x070c0c38u),
uvec2(0x00000000u, 0x6e3b0000u),
uvec2(0x00000000u, 0x00000000u));
uvec2 ascii_bitmap[96] = uint2_array(uvec2(0x00000000u, 0x00000000u),
uvec2(0x18001800u, 0x183c3c18u),
uvec2(0x00000000u, 0x36360000u),
uvec2(0x7f363600u, 0x36367f36u),
uvec2(0x301f0c00u, 0x0c3e031eu),
uvec2(0x0c666300u, 0x00633318u),
uvec2(0x3b336e00u, 0x1c361c6eu),
uvec2(0x00000000u, 0x06060300u),
uvec2(0x060c1800u, 0x180c0606u),
uvec2(0x180c0600u, 0x060c1818u),
uvec2(0x3c660000u, 0x00663cffu),
uvec2(0x0c0c0000u, 0x000c0c3fu),
uvec2(0x000c0c06u, 0x00000000u),
uvec2(0x00000000u, 0x0000003fu),
uvec2(0x000c0c00u, 0x00000000u),
uvec2(0x06030100u, 0x6030180cu),
uvec2(0x6f673e00u, 0x3e63737bu),
uvec2(0x0c0c3f00u, 0x0c0e0c0cu),
uvec2(0x06333f00u, 0x1e33301cu),
uvec2(0x30331e00u, 0x1e33301cu),
uvec2(0x7f307800u, 0x383c3633u),
uvec2(0x30331e00u, 0x3f031f30u),
uvec2(0x33331e00u, 0x1c06031fu),
uvec2(0x0c0c0c00u, 0x3f333018u),
uvec2(0x33331e00u, 0x1e33331eu),
uvec2(0x30180e00u, 0x1e33333eu),
uvec2(0x000c0c00u, 0x000c0c00u),
uvec2(0x000c0c06u, 0x000c0c00u),
uvec2(0x060c1800u, 0x180c0603u),
uvec2(0x003f0000u, 0x00003f00u),
uvec2(0x180c0600u, 0x060c1830u),
uvec2(0x0c000c00u, 0x1e333018u),
uvec2(0x7b031e00u, 0x3e637b7bu),
uvec2(0x3f333300u, 0x0c1e3333u),
uvec2(0x66663f00u, 0x3f66663eu),
uvec2(0x03663c00u, 0x3c660303u),
uvec2(0x66361f00u, 0x1f366666u),
uvec2(0x16467f00u, 0x7f46161eu),
uvec2(0x16060f00u, 0x7f46161eu),
uvec2(0x73667c00u, 0x3c660303u),
uvec2(0x33333300u, 0x3333333fu),
uvec2(0x0c0c1e00u, 0x1e0c0c0cu),
uvec2(0x33331e00u, 0x78303030u),
uvec2(0x36666700u, 0x6766361eu),
uvec2(0x46667f00u, 0x0f060606u),
uvec2(0x6b636300u, 0x63777f7fu),
uvec2(0x73636300u, 0x63676f7bu),
uvec2(0x63361c00u, 0x1c366363u),
uvec2(0x06060f00u, 0x3f66663eu),
uvec2(0x3b1e3800u, 0x1e333333u),
uvec2(0x36666700u, 0x3f66663eu),
uvec2(0x38331e00u, 0x1e33070eu),
uvec2(0x0c0c1e00u, 0x3f2d0c0cu),
uvec2(0x33333f00u, 0x33333333u),
uvec2(0x331e0c00u, 0x33333333u),
uvec2(0x7f776300u, 0x6363636bu),
uvec2(0x1c366300u, 0x6363361cu),
uvec2(0x0c0c1e00u, 0x3333331eu),
uvec2(0x4c667f00u, 0x7f633118u),
uvec2(0x06061e00u, 0x1e060606u),
uvec2(0x30604000u, 0x03060c18u),
uvec2(0x18181e00u, 0x1e181818u),
uvec2(0x00000000u, 0x081c3663u),
uvec2(0x000000ffu, 0x00000000u),
uvec2(0x00000000u, 0x0c0c1800u),
uvec2(0x3e336e00u, 0x00001e30u),
uvec2(0x66663b00u, 0x0706063eu),
uvec2(0x03331e00u, 0x00001e33u),
uvec2(0x33336e00u, 0x3830303eu),
uvec2(0x3f031e00u, 0x00001e33u),
uvec2(0x06060f00u, 0x1c36060fu),
uvec2(0x333e301fu, 0x00006e33u),
uvec2(0x66666700u, 0x0706366eu),
uvec2(0x0c0c1e00u, 0x0c000e0cu),
uvec2(0x3033331eu, 0x30003030u),
uvec2(0x1e366700u, 0x07066636u),
uvec2(0x0c0c1e00u, 0x0e0c0c0cu),
uvec2(0x7f6b6300u, 0x0000337fu),
uvec2(0x33333300u, 0x00001f33u),
uvec2(0x33331e00u, 0x00001e33u),
uvec2(0x663e060fu, 0x00003b66u),
uvec2(0x333e3078u, 0x00006e33u),
uvec2(0x66060f00u, 0x00003b6eu),
uvec2(0x1e301f00u, 0x00003e03u),
uvec2(0x0c2c1800u, 0x080c3e0cu),
uvec2(0x33336e00u, 0x00003333u),
uvec2(0x331e0c00u, 0x00003333u),
uvec2(0x7f7f3600u, 0x0000636bu),
uvec2(0x1c366300u, 0x00006336u),
uvec2(0x333e301fu, 0x00003333u),
uvec2(0x0c263f00u, 0x00003f19u),
uvec2(0x0c0c3800u, 0x380c0c07u),
uvec2(0x18181800u, 0x18181800u),
uvec2(0x0c0c0700u, 0x070c0c38u),
uvec2(0x00000000u, 0x6e3b0000u),
uvec2(0x00000000u, 0x00000000u));
if (any(lessThan(bitmap_position, uvec2(0))) || any(greaterThanEqual(bitmap_position, uvec2(8))))
{

View File

@@ -53,11 +53,11 @@ mat4x4 drw_modelinv()
*/
mat3x3 drw_normat()
{
return transpose(mat3x3(drw_modelinv()));
return transpose(to_float3x3(drw_modelinv()));
}
mat3x3 drw_norinv()
{
return transpose(mat3x3(drw_modelmat()));
return transpose(to_float3x3(drw_modelmat()));
}
/* -------------------------------------------------------------------- */
@@ -77,11 +77,11 @@ vec3 drw_normal_world_to_object(vec3 N)
vec3 drw_normal_object_to_view(vec3 lN)
{
return (mat3x3(drw_view.viewmat) * (drw_normat() * lN));
return (to_float3x3(drw_view.viewmat) * (drw_normat() * lN));
}
vec3 drw_normal_view_to_object(vec3 vN)
{
return (drw_norinv() * (mat3x3(drw_view.viewinv) * vN));
return (drw_norinv() * (to_float3x3(drw_view.viewinv) * vN));
}
/** \} */

View File

@@ -108,12 +108,12 @@ float drw_ndc_to_screen(float ndc_P)
vec3 drw_normal_view_to_world(vec3 vN)
{
return (mat3x3(drw_view.viewinv) * vN);
return (to_float3x3(drw_view.viewinv) * vN);
}
vec3 drw_normal_world_to_view(vec3 N)
{
return (mat3x3(drw_view.viewmat) * N);
return (to_float3x3(drw_view.viewmat) * N);
}
/** \} */

View File

@@ -41,6 +41,11 @@ struct GPUShader;
*/
GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info);
/**
* Same as GPU_shader_create_from_info but will run preprocessor on source strings.
*/
GPUShader *GPU_shader_create_from_info_python(const GPUShaderCreateInfo *_info);
/**
* Create a shader using a named #GPUShaderCreateInfo registered at startup.
* These are declared inside `*_info.hh` files using the `GPU_SHADER_CREATE_INFO()` macro.

View File

@@ -7,6 +7,8 @@
# Build `glsl_preprocess` executable.
set(SRC
glsl_preprocess.cc
glsl_preprocess.hh
)
# `SRC_DNA_INC` is defined in the parent directory.

View File

@@ -1,4 +1,4 @@
/* SPDX-FileCopyrightText: 2001-2002 NaN Holding BV. All rights reserved.
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
@@ -6,11 +6,12 @@
* \ingroup glsl_preprocess
*/
#include <algorithm>
#include <fstream>
#include <iostream>
#include <regex>
#include <string>
#include <vector>
#include "glsl_preprocess.hh"
int main(int argc, char **argv)
{
@@ -40,8 +41,29 @@ int main(int argc, char **argv)
bool first_comment = true;
bool inside_comment = false;
int error = 0;
size_t line_index = 0;
auto report_error =
[&](const std::string &src_line, const std::smatch &match, const char *err_msg) {
size_t err_line = line_index;
size_t err_char = match.position();
std::cerr << input_file_name;
std::cerr << ':' << std::to_string(err_line) << ':' << std::to_string(err_char);
std::cerr << ": error: " << err_msg << std::endl;
std::cerr << src_line << std::endl;
std::cerr << std::string(err_char, ' ') << '^' << std::endl;
error++;
};
blender::gpu::shader::Preprocessor processor(report_error);
std::string line;
while (std::getline(input_file, line)) {
line_index++;
/* Remove licence headers (first comment). */
if (line.rfind("/*", 0) == 0 && first_comment) {
first_comment = false;
@@ -55,17 +77,17 @@ int main(int argc, char **argv)
}
if (skip_line) {
line = "";
output_file << "\n";
}
else if (line.rfind("#include ", 0) == 0 || line.rfind("#pragma once", 0) == 0) {
line[0] = line[1] = '/';
else {
processor << line << '\n';
}
output_file << line << "\n";
}
output_file << processor.str();
input_file.close();
output_file.close();
return 0;
return error;
}

View File

@@ -0,0 +1,219 @@
/* SPDX-FileCopyrightText: 2024 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup glsl_preprocess
*/
#pragma once
#include <algorithm>
#include <regex>
#include <sstream>
#include <string>
#include <vector>
namespace blender::gpu::shader {
/**
* Shader source preprocessor that allow to mutate GLSL into cross API source that can be
* interpreted by the different GPU backends. Some syntax are mutated or reported as incompatible.
*
* Implementation speed is not a huge concern as we only apply this at compile time or on python
* shaders source.
*/
template<typename T, bool no_linting = false> class Preprocessor {
T &report_error;
struct SharedVar {
std::string type;
std::string name;
std::string array;
};
std::vector<SharedVar> shared_vars_;
std::stringstream output_;
public:
Preprocessor(T &error_cb) : report_error(error_cb) {}
Preprocessor &operator<<(std::string str)
{
threadgroup_variable_parsing(str);
matrix_constructor_linting(str);
array_constructor_linting(str);
str = preprocessor_directive_mutation(str);
str = argument_decorator_macro_injection(str);
str = array_constructor_macro_injection(str);
output_ << str;
return *this;
}
Preprocessor &operator<<(char c)
{
output_ << c;
return *this;
}
std::string str()
{
return output_.str() + suffix();
}
private:
std::string preprocessor_directive_mutation(const std::string &str)
{
/* Example: `#include "deps.glsl"` > `//include "deps.glsl"` */
std::regex regex("#\\s*(include|pragma once)");
return std::regex_replace(str, regex, "//$1");
}
void threadgroup_variable_parsing(std::string str)
{
std::regex regex("shared\\s+(\\w+)\\s+(\\w+)([^;]*);");
for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) {
shared_vars_.push_back({match[1].str(), match[2].str(), match[3].str()});
}
}
std::string argument_decorator_macro_injection(const std::string &str)
{
/* Example: `out float var[2]` > `out float _out_sta var _out_end[2]` */
std::regex regex("(out|inout|in|shared)\\s+(\\w+)\\s+(\\w+)");
return std::regex_replace(str, regex, "$1 $2 _$1_sta $3 _$1_end");
}
std::string array_constructor_macro_injection(const std::string &str)
{
/* Example: `= float[2](0.0, 0.0)` > `= ARRAY_T(float) ARRAY_V(0.0, 0.0)` */
std::regex regex("=\\s*(\\w+)\\s*\\[[^\\]]*\\]\\s*\\(");
return std::regex_replace(str, regex, "= ARRAY_T($1) ARRAY_V(");
}
/* TODO(fclem): Too many false positive and false negative to be applied to python shaders. */
void matrix_constructor_linting(std::string str)
{
if constexpr (no_linting) {
return;
}
/* Example: `mat4(other_mat)`. */
std::regex regex("\\s+(mat(\\d|\\dx\\d)|float\\dx\\d)\\([^,\\s\\d]+\\)");
for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) {
/* This only catches some invalid usage. For the rest, the CI will catch them. */
const char *msg =
"Matrix constructor is not cross API compatible. "
"Use to_floatNxM to reshape the matrix or use other constructors instead.";
report_error(str, match, msg);
}
}
void array_constructor_linting(std::string str)
{
if constexpr (no_linting) {
return;
}
std::regex regex("=\\s*(\\w+)\\s*\\[[^\\]]*\\]\\s*\\(");
for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) {
/* This only catches some invalid usage. For the rest, the CI will catch them. */
const char *msg =
"Array constructor is not cross API compatible. Use type_array instead of type[].";
report_error(str, match, msg);
}
}
std::string suffix()
{
if (shared_vars_.empty()) {
return "";
}
std::stringstream suffix;
/**
* For Metal shaders to compile, shared (threadgroup) variable cannot be declared globally.
* They must reside within a function scope. Hence, we need to extract these declarations and
* generate shared memory blocks within the entry point function. These shared memory blocks
* can then be passed as references to the remaining shader via the class function scope.
*
* The shared variable definitions from the source file are replaced with references to
* threadgroup memory blocks (using _shared_sta and _shared_end macros), but kept in-line in
* case external macros are used to declare the dimensions.
*
* Each part of the codegen is stored inside macros so that we don't have to do string
* replacement at runtime.
*/
/* Arguments of the wrapper class constructor. */
suffix << "#undef MSL_SHARED_VARS_ARGS\n";
/* References assignment inside wrapper class constructor. */
suffix << "#undef MSL_SHARED_VARS_ASSIGN\n";
/* Declaration of threadgroup variables in entry point function. */
suffix << "#undef MSL_SHARED_VARS_DECLARE\n";
/* Arguments for wrapper class constructor call. */
suffix << "#undef MSL_SHARED_VARS_PASS\n";
/**
* Example replacement:
*
* `
* // Source
* shared float bar[10]; // Source declaration.
* shared float foo; // Source declaration.
* // Rest of the source ...
* // End of Source
*
* // Backend Output
* class Wrapper { // Added at runtime by backend.
*
* threadgroup float (&foo); // Replaced by regex and macros.
* threadgroup float (&bar)[10]; // Replaced by regex and macros.
* // Rest of the source ...
*
* Wrapper ( // Added at runtime by backend.
* threadgroup float (&_foo), threadgroup float (&_bar)[10] // MSL_SHARED_VARS_ARGS
* ) // Added at runtime by backend.
* : foo(_foo), bar(_bar) // MSL_SHARED_VARS_ASSIGN
* {} // Added at runtime by backend.
*
* }; // End of Wrapper // Added at runtime by backend.
*
* kernel entry_point() { // Added at runtime by backend.
*
* threadgroup float foo; // MSL_SHARED_VARS_DECLARE
* threadgroup float bar[10] // MSL_SHARED_VARS_DECLARE
*
* Wrapper wrapper // Added at runtime by backend.
* (foo, bar) // MSL_SHARED_VARS_PASS
* ; // Added at runtime by backend.
*
* } // Added at runtime by backend.
* // End of Backend Output
* `
*/
std::stringstream args, assign, declare, pass;
bool first = true;
for (SharedVar &var : shared_vars_) {
char sep = first ? ' ' : ',';
/* */
args << sep << "threadgroup " << var.type << "(&_" << var.name << ")" << var.array;
assign << (first ? ':' : ',') << var.name << "(_" << var.name << ")";
declare << "threadgroup " << var.type << ' ' << var.name << var.array << ";";
pass << sep << var.name;
first = false;
}
suffix << "#define MSL_SHARED_VARS_ARGS " << args.str() << "\n";
suffix << "#define MSL_SHARED_VARS_ASSIGN " << assign.str() << "\n";
suffix << "#define MSL_SHARED_VARS_DECLARE " << declare.str() << "\n";
suffix << "#define MSL_SHARED_VARS_PASS (" << pass.str() << ")\n";
return suffix.str();
}
};
template<typename T> class PreprocessorPython : public Preprocessor<T, true> {
public:
PreprocessorPython(T &error_cb) : Preprocessor<T, true>(error_cb){};
};
} // namespace blender::gpu::shader

View File

@@ -17,6 +17,8 @@
#include "GPU_matrix.hh"
#include "GPU_platform.hh"
#include "glsl_preprocess/glsl_preprocess.hh"
#include "gpu_backend.hh"
#include "gpu_context_private.hh"
#include "gpu_shader_create_info.hh"
@@ -299,6 +301,40 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
return wrap(Context::get()->compiler->compile(info, false));
}
static std::string preprocess_source(StringRefNull original)
{
auto no_err_report = [](std::string, std::smatch, const char *) {};
gpu::shader::PreprocessorPython processor(no_err_report);
processor << std::string(original);
return processor.str();
};
GPUShader *GPU_shader_create_from_info_python(const GPUShaderCreateInfo *_info)
{
using namespace blender::gpu::shader;
ShaderCreateInfo &info = *const_cast<ShaderCreateInfo *>(
reinterpret_cast<const ShaderCreateInfo *>(_info));
std::string vertex_source_original = info.vertex_source_generated;
std::string fragment_source_original = info.fragment_source_generated;
std::string geometry_source_original = info.geometry_source_generated;
std::string compute_source_original = info.compute_source_generated;
info.vertex_source_generated = preprocess_source(info.vertex_source_generated);
info.fragment_source_generated = preprocess_source(info.fragment_source_generated);
info.geometry_source_generated = preprocess_source(info.geometry_source_generated);
info.compute_source_generated = preprocess_source(info.compute_source_generated);
GPUShader *result = wrap(Context::get()->compiler->compile(info, false));
info.vertex_source_generated = vertex_source_original;
info.fragment_source_generated = fragment_source_original;
info.geometry_source_generated = geometry_source_original;
info.compute_source_generated = compute_source_original;
return result;
}
GPUShader *GPU_shader_create_from_python(const char *vertcode,
const char *fragcode,
const char *geomcode,
@@ -315,6 +351,28 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode,
libcode = libcodecat = BLI_strdupcat(libcode, datatoc_gpu_shader_colorspace_lib_glsl);
}
std::string vertex_source_processed;
std::string fragment_source_processed;
std::string geometry_source_processed;
std::string library_source_processed;
if (vertcode != nullptr) {
vertex_source_processed = preprocess_source(vertcode);
vertcode = vertex_source_processed.c_str();
}
if (fragcode != nullptr) {
fragment_source_processed = preprocess_source(fragcode);
fragcode = fragment_source_processed.c_str();
}
if (geomcode != nullptr) {
geometry_source_processed = preprocess_source(geomcode);
geomcode = geometry_source_processed.c_str();
}
if (libcode != nullptr) {
library_source_processed = preprocess_source(libcode);
libcode = library_source_processed.c_str();
}
/* Use pyGPUShader as default name for shader. */
const char *shname = name != nullptr ? name : "pyGPUShader";

View File

@@ -484,7 +484,13 @@ struct GPUSource {
int64_t keyword_cursor = 0;
out_qualifier = keyword_parse(arg, keyword_cursor);
out_type = keyword_parse(arg, keyword_cursor);
/* Skip qualifier prefix macro expanded by GLSL preprocessing (e.g. _out_sta). */
StringRef qualifier_prefix = keyword_parse(arg, keyword_cursor);
out_name = keyword_parse(arg, keyword_cursor);
if (out_qualifier == "const") {
out_name = qualifier_prefix;
}
if (out_name.is_empty()) {
/* No qualifier case. */
out_name = out_type;
@@ -1031,8 +1037,8 @@ struct GPUSource {
GPUSource *dependency_source = nullptr;
{
/* Include directive has been mangled on purpose. See `datatoc.cc`. */
pos = source.find("//nclude \"", pos + 1);
/* Include directive has been mangled on purpose. See `glsl_preprocess.hh`. */
pos = source.find("\n//include \"", pos + 1);
if (pos == -1) {
return 0;
}

View File

@@ -426,8 +426,6 @@ class MSLGeneratorInterface {
blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings_tf;
/* Clip Distances. */
blender::Vector<char> clip_distances;
/* Shared Memory Blocks. */
blender::Vector<MSLSharedMemoryBlock> shared_memory_blocks;
/* Max bind IDs. */
int max_tex_bind_index = 0;
/** GL Global usage. */

View File

@@ -182,26 +182,6 @@ static void remove_singleline_comments_func(std::string &str)
}
}
static bool is_program_word(const char *chr, int *len)
{
int numchars = 0;
for (const char *c = chr; *c != '\0'; c++) {
char ch = *c;
/* NOTE: Hash (`#`) is not valid in var names, but is used by Closure macro patterns. */
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(numchars > 0 && ch >= '0' && ch <= '9') || ch == '_' || ch == '#')
{
numchars++;
}
else {
*len = numchars;
return (numchars > 0);
}
}
*len = numchars;
return true;
}
static int backwards_program_word_scan(const char *array_loc, const char *min)
{
const char *start;
@@ -267,130 +247,6 @@ static void extract_and_replace_clipping_distances(std::string &vertex_source,
}
}
/**
* Replace function parameter patterns containing:
* `out vec3 somevar` with `THD vec3&somevar`.
* which enables pass by reference via resolved macro:
* `thread vec3& somevar`.
*/
static void replace_outvars(std::string &str)
{
char *current_str_begin = &*str.begin();
char *current_str_end = &*str.end();
for (char *c = current_str_begin + 2; c < current_str_end - 6; c++) {
char *start = strstr(c, "out ");
if (start == nullptr) {
return;
}
else {
c = start;
if (strncmp(c - 2, "in", 2) == 0) {
start = c - 2;
}
/* Check that the following are words. */
int len1, len2;
char *word_base1 = c + 4;
char *word_base2 = word_base1;
if (is_program_word(word_base1, &len1) && (*(word_base1 + len1) == ' ')) {
word_base2 = word_base1 + len1 + 1;
if (is_program_word(word_base2, &len2)) {
/* Match found. */
bool is_array = (*(word_base2 + len2) == '[');
if (is_array) {
/* Generate out-variable pattern for arrays, of form
* `OUT(vec2,samples,CRYPTOMATTE_LEVELS_MAX)`
* replacing original `out vec2 samples[SAMPLE_LEN]`
* using 'OUT' macro declared in `mtl_shader_defines.msl`. */
char *array_end = strchr(word_base2 + len2, ']');
if (array_end != nullptr) {
*start = 'O';
*(start + 1) = 'U';
*(start + 2) = 'T';
*(start + 3) = '(';
for (char *clear = start + 4; clear < c + 4; clear++) {
*clear = ' ';
}
*(word_base2 - 1) = ',';
*(word_base2 + len2) = ',';
*array_end = ')';
}
}
else {
/* Generate out-variable pattern of form `THD type&var` from original `out vec4 var`.
*/
*start = 'T';
*(start + 1) = 'H';
*(start + 2) = 'D';
for (char *clear = start + 3; clear < c + 4; clear++) {
*clear = ' ';
}
*(word_base2 - 1) = '&';
}
}
}
}
}
}
static void replace_matrix_constructors(std::string &str)
{
/* Replace matrix constructors with GLSL-compatible constructors for Metal.
* Base matrix constructors e.g. mat3x3 do not have as many overload variants as GLSL.
* To add compatibility, we declare custom constructors e.g. MAT3x3 in `mtl_shader_defines.msl`.
* If the GLSL syntax matches, we map mat3x3(..) -> MAT3x3(..) and implement a custom
* constructor. This supports both mat3(..) and mat3x3(..) style syntax. */
char *current_str_begin = &*str.begin();
char *current_str_end = &*str.end();
for (char *c = current_str_begin; c < current_str_end - 10; c++) {
char *base_scan = strstr(c, "mat");
if (base_scan == nullptr) {
break;
}
/* Track end of constructor. */
char *constructor_end = nullptr;
/* check if next character is matrix dim. */
c = base_scan + 3;
if (!(*c == '2' || *c == '3' || *c == '4')) {
/* Not constructor, skip. */
continue;
}
/* Possible multiple dimensional matrix constructor. Verify if next char is a dim. */
c++;
if (*c == 'x') {
c++;
if (*c == '2' || *c == '3' || *c == '4') {
c++;
}
else {
/* Not matrix constructor, continue. */
continue;
}
}
/* Check for constructor opening brace. */
if (*c == '(') {
constructor_end = c;
}
else {
/* Not matrix constructor, continue. */
continue;
}
/* If is constructor, replace with MATN(..) syntax. */
if (constructor_end != nullptr) {
ARRAY_SET_ITEMS(base_scan, 'M', 'A', 'T');
continue;
}
}
}
static void replace_array_initializers_func(std::string &str)
{
char *current_str_begin = &*str.begin();
@@ -572,135 +428,6 @@ static bool extract_ssbo_pragma_info(const MTLShader *shader,
return false;
}
/* Extract shared memory declaration and their parameters.
* Inserts extracted cases as entries in MSLGeneratorInterface's shared memory block
* list. These will later be used to generate shared memory declarations within the entry point.
*
* TODO(Metal/GPU): Move shared memory declarations to GPUShaderCreateInfo. This is currently a
* necessary workaround to match GLSL functionality and enable full compute shader support. In the
* long term, best to avoid needing to perform this operation. */
void extract_shared_memory_blocks(MSLGeneratorInterface &msl_iface,
std::string &glsl_compute_source)
{
msl_iface.shared_memory_blocks.clear();
char *current_str_begin = &*glsl_compute_source.begin();
char *current_str_end = &*glsl_compute_source.end();
for (char *c = current_str_begin; c < current_str_end - 6; c++) {
/* Find first instance of "shared ". */
char *c_expr_start = strstr(c, "shared ");
if (c_expr_start == nullptr) {
break;
}
/* Check if "shared" was part of a previous word. If so, this is not valid. */
if (next_word_in_range(c_expr_start - 1, c_expr_start) != nullptr) {
c += 7; /* Jump forward by length of "shared ". */
continue;
}
/* Jump to shared declaration and detect end of statement. */
c = c_expr_start;
char *c_expr_end = strstr(c, ";");
if (c_expr_end == nullptr) {
break;
}
/* Prepare MSLSharedMemoryBlock instance. */
MSLSharedMemoryBlock new_shared_block;
char buf[256];
/* Read type-name. */
c += 7; /* Jump forward by length of "shared ". */
c = next_word_in_range(c, c_expr_end);
if (c == nullptr) {
c = c_expr_end + 1;
continue;
}
char *c_next_space = next_symbol_in_range(c, c_expr_end, ' ');
if (c_next_space == nullptr) {
c = c_expr_end + 1;
continue;
}
int len = c_next_space - c;
BLI_assert(len < 256);
BLI_strncpy(buf, c, len + 1);
new_shared_block.type_name = std::string(buf);
/* Read var-name.
* `varname` can either come right before the final semi-colon, or
* with following array syntax.
* spaces may exist before closing symbol. */
c = c_next_space + 1;
c = next_word_in_range(c, c_expr_end);
if (c == nullptr) {
c = c_expr_end + 1;
continue;
}
char *c_array_begin = next_symbol_in_range(c, c_expr_end, '[');
c_next_space = next_symbol_in_range(c, c_expr_end, ' ');
char *varname_end = nullptr;
if (c_array_begin != nullptr) {
/* Array path. */
if (c_next_space != nullptr) {
varname_end = (c_next_space < c_array_begin) ? c_next_space : c_array_begin;
}
else {
varname_end = c_array_begin;
}
new_shared_block.is_array = true;
}
else {
/* Ending semi-colon. */
if (c_next_space != nullptr) {
varname_end = (c_next_space < c_expr_end) ? c_next_space : c_expr_end;
}
else {
varname_end = c_expr_end;
}
new_shared_block.is_array = false;
}
len = varname_end - c;
BLI_assert(len < 256);
BLI_strncpy(buf, c, len + 1);
new_shared_block.varname = std::string(buf);
/* Determine if array. */
if (new_shared_block.is_array) {
int len = c_expr_end - c_array_begin;
BLI_strncpy(buf, c_array_begin, len + 1);
new_shared_block.array_decl = std::string(buf);
}
/* Shared block is valid, add it to the list and replace declaration with class member.
* reference. This declaration needs to have one of the formats:
* TG int& varname;
* TG int (&varname)[len][len]
*
* In order to fit in the same space, replace `threadgroup` with `TG` macro.
*/
for (char *c = c_expr_start; c <= c_expr_end; c++) {
*c = ' ';
}
std::string out_str = "TG ";
out_str += new_shared_block.type_name;
out_str += (new_shared_block.is_array) ? "(&" : "&";
out_str += new_shared_block.varname;
if (new_shared_block.is_array) {
out_str += ")" + new_shared_block.array_decl;
}
out_str += ";;";
memcpy(c_expr_start, out_str.c_str(), (out_str.length() - 1) * sizeof(char));
/* Jump to end of statement. */
c = c_expr_end + 1;
msl_iface.shared_memory_blocks.append(new_shared_block);
}
}
/** \} */
/* -------------------------------------------------------------------- */
@@ -973,11 +700,9 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
}
/* Special condition - mat3 and array constructor replacement. */
replace_matrix_constructors(shd_builder_->glsl_vertex_source_);
replace_array_initializers_func(shd_builder_->glsl_vertex_source_);
if (!msl_iface.uses_transform_feedback) {
replace_matrix_constructors(shd_builder_->glsl_fragment_source_);
replace_array_initializers_func(shd_builder_->glsl_fragment_source_);
}
@@ -1058,12 +783,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
/* Extract gl_ClipDistances. */
extract_and_replace_clipping_distances(shd_builder_->glsl_vertex_source_, msl_iface);
/* Replace 'out' attribute on function parameters with pass-by-reference. */
replace_outvars(shd_builder_->glsl_vertex_source_);
if (!msl_iface.uses_transform_feedback) {
replace_outvars(shd_builder_->glsl_fragment_source_);
}
/**** METAL Shader source generation. ****/
/* Setup `stringstream` for populating generated MSL shader vertex/frag shaders. */
std::stringstream ss_vertex;
@@ -1252,7 +971,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
/* Add Texture members.
* These members pack both a texture and a sampler into a single
* struct, as both are needed within texture functions.
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
* e.g. `_mtl_sampler_2d<float, access::read>`
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::VERTEX)) {
@@ -1485,7 +1204,6 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
BLI_assert(shd_builder_->glsl_compute_source_.size() > 0);
/*** Source cleanup. ***/
replace_matrix_constructors(shd_builder_->glsl_compute_source_);
replace_array_initializers_func(shd_builder_->glsl_compute_source_);
/**** Extract usage of GL globals. ****/
@@ -1526,19 +1244,6 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
remove_multiline_comments_func(shd_builder_->glsl_compute_source_);
remove_singleline_comments_func(shd_builder_->glsl_compute_source_);
/** Extract usage of shared memory.
* For Metal shaders to compile, shared (threadgroup) memory cannot be declared globally.
* It must reside within a function scope. Hence, we need to extract these uses and generate
* shared memory blocks within the entry point function, which can then be passed as references
* to the remaining shader via the class function scope.
*
* The existing block definitions are then replaced with references to threadgroup memory blocks,
* but kept in-line in case external macros are used to declare the dimensions. */
extract_shared_memory_blocks(msl_iface, shd_builder_->glsl_compute_source_);
/* Replace 'out' attribute on function parameters with pass-by-reference. */
replace_outvars(shd_builder_->glsl_compute_source_);
/** Generate Compute shader stage. **/
std::stringstream ss_compute;
ss_compute << "#line 1 \"msl_wrapper_code\"\n";
@@ -1599,7 +1304,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
/* Add Texture members.
* These members pack both a texture and a sampler into a single
* struct, as both are needed within texture functions.
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
* e.g. `_mtl_sampler_2d<float, access::read>`
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
if (bool(tex.stage & ShaderStage::COMPUTE)) {
@@ -1631,37 +1336,8 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
/* Compute constructor for Shared memory blocks, as we must pass
* local references from entry-point function scope into the class
* instantiation. */
ss_compute << get_stage_class_name(ShaderStage::COMPUTE) << "(";
bool first = true;
if (msl_iface.shared_memory_blocks.size() > 0) {
for (const MSLSharedMemoryBlock &block : msl_iface.shared_memory_blocks) {
if (!first) {
ss_compute << ",";
}
if (block.is_array) {
ss_compute << "TG " << block.type_name << " (&_" << block.varname << ")"
<< block.array_decl;
}
else {
ss_compute << "TG " << block.type_name << " &_" << block.varname;
}
ss_compute << std::endl;
first = false;
}
ss_compute << ") : ";
first = true;
for (const MSLSharedMemoryBlock &block : msl_iface.shared_memory_blocks) {
if (!first) {
ss_compute << ",";
}
ss_compute << block.varname << "(_" << block.varname << ")";
first = false;
}
}
else {
ss_compute << ") ";
}
ss_compute << "{ }" << std::endl;
ss_compute << get_stage_class_name(ShaderStage::COMPUTE)
<< "(MSL_SHARED_VARS_ARGS) MSL_SHARED_VARS_ASSIGN {}\n";
/* Class Closing Bracket to end shader global scope. */
ss_compute << "};" << std::endl;
@@ -2440,28 +2116,9 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
out << this->generate_msl_compute_inputs_string();
out << ") {" << std::endl << std::endl;
/* Generate Compute shader instance constructor. If shared memory blocks are used,
* these must be declared and then passed into the constructor. */
std::string stage_instance_constructor = "";
bool first = true;
if (shared_memory_blocks.size() > 0) {
stage_instance_constructor += "(";
for (const MSLSharedMemoryBlock &block : shared_memory_blocks) {
if (block.is_array) {
out << "TG " << block.type_name << " " << block.varname << block.array_decl << ";";
}
else {
out << "TG " << block.type_name << " " << block.varname << ";";
}
stage_instance_constructor += ((!first) ? "," : "") + block.varname;
first = false;
out << std::endl;
}
stage_instance_constructor += ")";
}
out << "MSL_SHARED_VARS_DECLARE\n";
out << "\t" << get_stage_class_name(ShaderStage::COMPUTE) << " " << shader_stage_inst_name
<< stage_instance_constructor << ";" << std::endl;
<< " MSL_SHARED_VARS_PASS;\n";
/* Copy global variables. */
/* Entry point parameters for gl Globals. */
@@ -3584,9 +3241,10 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_
if (tex_buf_id != -1) {
MSLBufferBlock &ssbo = this->storage_blocks[tex_buf_id];
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
<< this->texture_samplers[i].name << ".buffer = " << ssbo.name << ";" << std::endl;
<< this->texture_samplers[i].name << ".atomic.buffer = " << ssbo.name << ";"
<< std::endl;
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
<< this->texture_samplers[i].name << ".aligned_width = uniforms->"
<< this->texture_samplers[i].name << ".atomic.aligned_width = uniforms->"
<< this->texture_samplers[i].name << "_metadata.w;" << std::endl;
/* Buffer-backed 2D Array and 3D texture types are not natively supported so texture size
@@ -3598,7 +3256,7 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_
ImageType::INT_3D_ATOMIC))
{
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
<< this->texture_samplers[i].name << ".texture_size = ushort3(uniforms->"
<< this->texture_samplers[i].name << ".atomic.texture_size = ushort3(uniforms->"
<< this->texture_samplers[i].name << "_metadata.xyz);" << std::endl;
}
}
@@ -4022,128 +3680,128 @@ std::string MSLTextureResource::get_msl_wrapper_type_str() const
/* Add Types as needed. */
switch (this->type) {
case ImageType::FLOAT_1D: {
return "_mtl_combined_image_sampler_1d";
return "_mtl_sampler_1d";
}
case ImageType::FLOAT_2D: {
return "_mtl_combined_image_sampler_2d";
return "_mtl_sampler_2d";
}
case ImageType::FLOAT_3D: {
return "_mtl_combined_image_sampler_3d";
return "_mtl_sampler_3d";
}
case ImageType::FLOAT_CUBE: {
return "_mtl_combined_image_sampler_cube";
return "_mtl_sampler_cube";
}
case ImageType::FLOAT_1D_ARRAY: {
return "_mtl_combined_image_sampler_1d_array";
return "_mtl_sampler_1d_array";
}
case ImageType::FLOAT_2D_ARRAY: {
return "_mtl_combined_image_sampler_2d_array";
return "_mtl_sampler_2d_array";
}
case ImageType::FLOAT_CUBE_ARRAY: {
return "_mtl_combined_image_sampler_cube_array";
return "_mtl_sampler_cube_array";
}
case ImageType::FLOAT_BUFFER: {
return "_mtl_combined_image_sampler_buffer";
return "_mtl_sampler_buffer";
}
case ImageType::DEPTH_2D: {
return "_mtl_combined_image_sampler_depth_2d";
return "_mtl_sampler_depth_2d";
}
case ImageType::SHADOW_2D: {
return "_mtl_combined_image_sampler_depth_2d";
return "_mtl_sampler_depth_2d";
}
case ImageType::DEPTH_2D_ARRAY: {
return "_mtl_combined_image_sampler_depth_2d_array";
return "_mtl_sampler_depth_2d_array";
}
case ImageType::SHADOW_2D_ARRAY: {
return "_mtl_combined_image_sampler_depth_2d_array";
return "_mtl_sampler_depth_2d_array";
}
case ImageType::DEPTH_CUBE: {
return "_mtl_combined_image_sampler_depth_cube";
return "_mtl_sampler_depth_cube";
}
case ImageType::SHADOW_CUBE: {
return "_mtl_combined_image_sampler_depth_cube";
return "_mtl_sampler_depth_cube";
}
case ImageType::DEPTH_CUBE_ARRAY: {
return "_mtl_combined_image_sampler_depth_cube_array";
return "_mtl_sampler_depth_cube_array";
}
case ImageType::SHADOW_CUBE_ARRAY: {
return "_mtl_combined_image_sampler_depth_cube_array";
return "_mtl_sampler_depth_cube_array";
}
case ImageType::INT_1D: {
return "_mtl_combined_image_sampler_1d";
return "_mtl_sampler_1d";
}
case ImageType::INT_2D: {
return "_mtl_combined_image_sampler_2d";
return "_mtl_sampler_2d";
}
case ImageType::INT_3D: {
return "_mtl_combined_image_sampler_3d";
return "_mtl_sampler_3d";
}
case ImageType::INT_CUBE: {
return "_mtl_combined_image_sampler_cube";
return "_mtl_sampler_cube";
}
case ImageType::INT_1D_ARRAY: {
return "_mtl_combined_image_sampler_1d_array";
return "_mtl_sampler_1d_array";
}
case ImageType::INT_2D_ARRAY: {
return "_mtl_combined_image_sampler_2d_array";
return "_mtl_sampler_2d_array";
}
case ImageType::INT_CUBE_ARRAY: {
return "_mtl_combined_image_sampler_cube_array";
return "_mtl_sampler_cube_array";
}
case ImageType::INT_BUFFER: {
return "_mtl_combined_image_sampler_buffer";
return "_mtl_sampler_buffer";
}
case ImageType::UINT_1D: {
return "_mtl_combined_image_sampler_1d";
return "_mtl_sampler_1d";
}
case ImageType::UINT_2D: {
return "_mtl_combined_image_sampler_2d";
return "_mtl_sampler_2d";
}
case ImageType::UINT_3D: {
return "_mtl_combined_image_sampler_3d";
return "_mtl_sampler_3d";
}
case ImageType::UINT_CUBE: {
return "_mtl_combined_image_sampler_cube";
return "_mtl_sampler_cube";
}
case ImageType::UINT_1D_ARRAY: {
return "_mtl_combined_image_sampler_1d_array";
return "_mtl_sampler_1d_array";
}
case ImageType::UINT_2D_ARRAY: {
return "_mtl_combined_image_sampler_2d_array";
return "_mtl_sampler_2d_array";
}
case ImageType::UINT_CUBE_ARRAY: {
return "_mtl_combined_image_sampler_cube_array";
return "_mtl_sampler_cube_array";
}
case ImageType::UINT_BUFFER: {
return "_mtl_combined_image_sampler_buffer";
return "_mtl_sampler_buffer";
}
/* If native texture atomics are unsupported, map types to fallback atomic structures which
* contain a buffer pointer and metadata members for size and alignment. */
case ImageType::INT_2D_ATOMIC:
case ImageType::UINT_2D_ATOMIC: {
if (supports_native_atomics) {
return "_mtl_combined_image_sampler_2d";
return "_mtl_sampler_2d";
}
else {
return "_mtl_combined_image_sampler_2d_atomic_fallback";
return "_mtl_sampler_2d_atomic";
}
}
case ImageType::INT_3D_ATOMIC:
case ImageType::UINT_3D_ATOMIC: {
if (supports_native_atomics) {
return "_mtl_combined_image_sampler_3d";
return "_mtl_sampler_3d";
}
else {
return "_mtl_combined_image_sampler_3d_atomic_fallback";
return "_mtl_sampler_3d_atomic";
}
}
case ImageType::INT_2D_ARRAY_ATOMIC:
case ImageType::UINT_2D_ARRAY_ATOMIC: {
if (supports_native_atomics) {
return "_mtl_combined_image_sampler_2d_array";
return "_mtl_sampler_2d_array";
}
else {
return "_mtl_combined_image_sampler_2d_array_atomic_fallback";
return "_mtl_sampler_2d_array_atomic";
}
}
default: {

View File

@@ -427,24 +427,6 @@ static void detect_workarounds()
{
GLContext::unused_fb_slot_workaround = true;
}
/* dFdx/dFdy calculation factors, those are dependent on driver. */
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY) && strstr(version, "3.3.10750"))
{
GLContext::derivative_signs[0] = 1.0;
GLContext::derivative_signs[1] = -1.0;
}
else if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_ANY)) {
if (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
strstr(version, "4.0.0 - Build 9.18.10.3186") ||
strstr(version, "4.0.0 - Build 9.18.10.3165") ||
strstr(version, "3.1.0 - Build 9.17.10.3347") ||
strstr(version, "3.1.0 - Build 9.17.10.4101") ||
strstr(version, "3.3.0 - Build 8.15.10.2618"))
{
GLContext::derivative_signs[0] = -1.0;
GLContext::derivative_signs[1] = 1.0;
}
}
/* Draw shader parameters are broken on Qualcomm Windows ARM64 devices
* on Mesa version < 24.0.0 */
@@ -560,7 +542,6 @@ bool GLContext::texture_filter_anisotropic_support = false;
bool GLContext::debug_layer_workaround = false;
bool GLContext::unused_fb_slot_workaround = false;
bool GLContext::generate_mipmap_workaround = false;
float GLContext::derivative_signs[2] = {1.0f, 1.0f};
void GLBackend::capabilities_init()
{

View File

@@ -67,7 +67,6 @@ class GLContext : public Context {
static bool debug_layer_workaround;
static bool unused_fb_slot_workaround;
static bool generate_mipmap_workaround;
static float derivative_signs[2];
/** VBO for missing vertex attribute binding. Avoid undefined behavior on some implementation. */
GLuint default_attr_vbo_;

View File

@@ -1059,10 +1059,6 @@ static const char *glsl_patch_default_get()
/* Array compatibility. */
ss << "#define gpu_Array(_type) _type[]\n";
/* Derivative sign can change depending on implementation. */
ss << "#define DFDX_SIGN " << std::setprecision(2) << GLContext::derivative_signs[0] << "\n";
ss << "#define DFDY_SIGN " << std::setprecision(2) << GLContext::derivative_signs[1] << "\n";
/* GLSL Backend Lib. */
ss << datatoc_glsl_shader_defines_glsl;

View File

@@ -803,7 +803,7 @@ mat3x3 rotate(mat3x3 mat, EulerXYZ rotation)
mat4x4 rotate(mat4x4 mat, AxisAngle rotation)
{
mat4x4 result = mat4x4(rotate(mat3x3(mat), rotation));
mat4x4 result = to_float4x4(rotate(to_float3x3(mat), rotation));
result[0][3] = mat[0][3];
result[1][3] = mat[1][3];
result[2][3] = mat[2][3];
@@ -815,7 +815,7 @@ mat4x4 rotate(mat4x4 mat, AxisAngle rotation)
}
mat4x4 rotate(mat4x4 mat, EulerXYZ rotation)
{
mat4x4 result = mat4x4(rotate(mat3x3(mat), rotation));
mat4x4 result = to_float4x4(rotate(to_float3x3(mat), rotation));
result[0][3] = mat[0][3];
result[1][3] = mat[1][3];
result[2][3] = mat[2][3];
@@ -992,38 +992,38 @@ mat3x3 from_rot_scale(AxisAngle rotation, vec3 scale)
mat4x4 from_loc_rot(vec3 location, EulerXYZ rotation)
{
mat4x4 ret = mat4x4(from_rotation(rotation));
mat4x4 ret = to_float4x4(from_rotation(rotation));
ret[3].xyz = location;
return ret;
}
mat4x4 from_loc_rot(vec3 location, Quaternion rotation)
{
mat4x4 ret = mat4x4(from_rotation(rotation));
mat4x4 ret = to_float4x4(from_rotation(rotation));
ret[3].xyz = location;
return ret;
}
mat4x4 from_loc_rot(vec3 location, AxisAngle rotation)
{
mat4x4 ret = mat4x4(from_rotation(rotation));
mat4x4 ret = to_float4x4(from_rotation(rotation));
ret[3].xyz = location;
return ret;
}
mat4x4 from_loc_rot_scale(vec3 location, EulerXYZ rotation, vec3 scale)
{
mat4x4 ret = mat4x4(from_rot_scale(rotation, scale));
mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale));
ret[3].xyz = location;
return ret;
}
mat4x4 from_loc_rot_scale(vec3 location, Quaternion rotation, vec3 scale)
{
mat4x4 ret = mat4x4(from_rot_scale(rotation, scale));
mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale));
ret[3].xyz = location;
return ret;
}
mat4x4 from_loc_rot_scale(vec3 location, AxisAngle rotation, vec3 scale)
{
mat4x4 ret = mat4x4(from_rot_scale(rotation, scale));
mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale));
ret[3].xyz = location;
return ret;
}
@@ -1079,11 +1079,11 @@ EulerXYZ to_euler(mat3x3 mat, const bool normalized)
}
EulerXYZ to_euler(mat4x4 mat)
{
return to_euler(mat3(mat));
return to_euler(to_float3x3(mat));
}
EulerXYZ to_euler(mat4x4 mat, const bool normalized)
{
return to_euler(mat3(mat), normalized);
return to_euler(to_float3x3(mat), normalized);
}
Quaternion normalized_to_quat_fast(mat3 mat)
@@ -1193,11 +1193,11 @@ Quaternion to_quaternion(mat3x3 mat, const bool normalized)
}
Quaternion to_quaternion(mat4x4 mat)
{
return to_quaternion(mat3(mat));
return to_quaternion(to_float3x3(mat));
}
Quaternion to_quaternion(mat4x4 mat, const bool normalized)
{
return to_quaternion(mat3(mat), normalized);
return to_quaternion(to_float3x3(mat), normalized);
}
vec3 to_scale(mat3x3 mat)
@@ -1216,11 +1216,11 @@ vec3 to_scale(mat3x3 mat, const bool allow_negative_scale)
}
vec3 to_scale(mat4x4 mat)
{
return to_scale(mat3(mat));
return to_scale(to_float3x3(mat));
}
vec3 to_scale(mat4x4 mat, const bool allow_negative_scale)
{
return to_scale(mat3(mat), allow_negative_scale);
return to_scale(to_float3x3(mat), allow_negative_scale);
}
void to_rot_scale(mat3x3 mat, out EulerXYZ r_rotation, out vec3 r_scale)
@@ -1265,7 +1265,7 @@ void to_rot_scale(mat3x3 mat,
void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, out EulerXYZ r_rotation, out vec3 r_scale)
{
r_location = mat[3].xyz;
to_rot_scale(mat3(mat), r_rotation, r_scale);
to_rot_scale(to_float3x3(mat), r_rotation, r_scale);
}
void to_loc_rot_scale(mat4x4 mat,
out vec3 r_location,
@@ -1274,12 +1274,12 @@ void to_loc_rot_scale(mat4x4 mat,
const bool allow_negative_scale)
{
r_location = mat[3].xyz;
to_rot_scale(mat3(mat), r_rotation, r_scale, allow_negative_scale);
to_rot_scale(to_float3x3(mat), r_rotation, r_scale, allow_negative_scale);
}
void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, out Quaternion r_rotation, out vec3 r_scale)
{
r_location = mat[3].xyz;
to_rot_scale(mat3(mat), r_rotation, r_scale);
to_rot_scale(to_float3x3(mat), r_rotation, r_scale);
}
void to_loc_rot_scale(mat4x4 mat,
out vec3 r_location,
@@ -1288,7 +1288,7 @@ void to_loc_rot_scale(mat4x4 mat,
const bool allow_negative_scale)
{
r_location = mat[3].xyz;
to_rot_scale(mat3(mat), r_rotation, r_scale, allow_negative_scale);
to_rot_scale(to_float3x3(mat), r_rotation, r_scale, allow_negative_scale);
}
vec3 transform_point(mat3x3 mat, vec3 point)
@@ -1308,7 +1308,7 @@ vec3 transform_direction(mat3x3 mat, vec3 direction)
vec3 transform_direction(mat4x4 mat, vec3 direction)
{
return mat3x3(mat) * direction;
return to_float3x3(mat) * direction;
}
vec2 project_point(mat3x3 mat, vec2 point)
@@ -1423,7 +1423,7 @@ bool is_negative(mat3x3 mat)
}
bool is_negative(mat4x4 mat)
{
return is_negative(mat3x3(mat));
return is_negative(to_float3x3(mat));
}
bool is_equal(mat2x2 a, mat2x2 b, float epsilon)
@@ -1505,15 +1505,15 @@ bool is_uniformly_scaled(mat3x3 mat)
bool is_orthogonal(mat4x4 mat)
{
return is_orthogonal(mat3x3(mat));
return is_orthogonal(to_float3x3(mat));
}
bool is_orthonormal(mat4x4 mat)
{
return is_orthonormal(mat3x3(mat));
return is_orthonormal(to_float3x3(mat));
}
bool is_uniformly_scaled(mat4x4 mat)
{
return is_uniformly_scaled(mat3x3(mat));
return is_uniformly_scaled(to_float3x3(mat));
}
/* Returns true if each individual columns are unit scaled. Mainly for assert usage. */

View File

@@ -85,12 +85,15 @@ vec2 do_tria()
float size = (tria2) ? -tria2Size : tria1Size;
vec2 center = (tria2) ? tria2Center : tria1Center;
vec2 arrow_pos[4] = vec2[4](vec2(0.0, 0.6), vec2(0.6, 0.0), vec2(-0.6, 0.0), vec2(0.0, -0.6));
vec2 arrow_pos[4] = float2_array(
vec2(0.0, 0.6), vec2(0.6, 0.0), vec2(-0.6, 0.0), vec2(0.0, -0.6));
/* Rotated uv space by 45deg and mirrored. */
vec2 arrow_uvs[4] = vec2[4](vec2(0.0, 0.85), vec2(0.85, 0.85), vec2(0.0, 0.0), vec2(0.0, 0.85));
vec2 arrow_uvs[4] = float2_array(
vec2(0.0, 0.85), vec2(0.85, 0.85), vec2(0.0, 0.0), vec2(0.0, 0.85));
vec2 point_pos[4] = vec2[4](vec2(-1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, -1.0), vec2(1.0, 1.0));
vec2 point_uvs[4] = vec2[4](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 0.0), vec2(1.0, 1.0));
vec2 point_pos[4] = float2_array(
vec2(-1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, -1.0), vec2(1.0, 1.0));
vec2 point_uvs[4] = float2_array(vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 0.0), vec2(1.0, 1.0));
/* We reuse the SDF round-box rendering of widget to render the tria shapes.
* This means we do clever tricks to position the rectangle the way we want using

View File

@@ -30,44 +30,44 @@ void main()
{
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
* memory pressure. */
const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
vec2(0.02, 0.805),
vec2(0.067, 0.617),
vec2(0.169, 0.45),
vec2(0.293, 0.293),
vec2(0.45, 0.169),
vec2(0.617, 0.076),
vec2(0.805, 0.02),
vec2(1.0, 0.0),
vec2(-1.0, 0.0),
vec2(-0.805, 0.02),
vec2(-0.617, 0.067),
vec2(-0.45, 0.169),
vec2(-0.293, 0.293),
vec2(-0.169, 0.45),
vec2(-0.076, 0.617),
vec2(-0.02, 0.805),
vec2(0.0, 1.0),
vec2(0.0, -1.0),
vec2(-0.02, -0.805),
vec2(-0.067, -0.617),
vec2(-0.169, -0.45),
vec2(-0.293, -0.293),
vec2(-0.45, -0.169),
vec2(-0.617, -0.076),
vec2(-0.805, -0.02),
vec2(-1.0, 0.0),
vec2(1.0, 0.0),
vec2(0.805, -0.02),
vec2(0.617, -0.067),
vec2(0.45, -0.169),
vec2(0.293, -0.293),
vec2(0.169, -0.45),
vec2(0.076, -0.617),
vec2(0.02, -0.805),
vec2(0.0, -1.0));
const vec2 cornervec[36] = float2_array(vec2(0.0, 1.0),
vec2(0.02, 0.805),
vec2(0.067, 0.617),
vec2(0.169, 0.45),
vec2(0.293, 0.293),
vec2(0.45, 0.169),
vec2(0.617, 0.076),
vec2(0.805, 0.02),
vec2(1.0, 0.0),
vec2(-1.0, 0.0),
vec2(-0.805, 0.02),
vec2(-0.617, 0.067),
vec2(-0.45, 0.169),
vec2(-0.293, 0.293),
vec2(-0.169, 0.45),
vec2(-0.076, 0.617),
vec2(-0.02, 0.805),
vec2(0.0, 1.0),
vec2(0.0, -1.0),
vec2(-0.02, -0.805),
vec2(-0.067, -0.617),
vec2(-0.169, -0.45),
vec2(-0.293, -0.293),
vec2(-0.45, -0.169),
vec2(-0.617, -0.076),
vec2(-0.805, -0.02),
vec2(-1.0, 0.0),
vec2(1.0, 0.0),
vec2(0.805, -0.02),
vec2(0.617, -0.067),
vec2(0.45, -0.169),
vec2(0.293, -0.293),
vec2(0.169, -0.45),
vec2(0.076, -0.617),
vec2(0.02, -0.805),
vec2(0.0, -1.0));
const vec2 center_offset[4] = vec2[4](
const vec2 center_offset[4] = float2_array(
vec2(1.0, 1.0), vec2(-1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0));
uint cflag = vflag & CNR_FLAG_RANGE;

View File

@@ -338,8 +338,8 @@ vec3 dF_impl(vec3 v)
void dF_branch(float fn, out vec2 result)
{
result.x = DFDX_SIGN * dFdx(fn);
result.y = DFDY_SIGN * dFdy(fn);
result.x = dFdx(fn);
result.y = dFdy(fn);
}
#else
@@ -349,10 +349,10 @@ int g_derivative_flag = 0;
vec3 dF_impl(vec3 v)
{
if (g_derivative_flag > 0) {
return DFDX_SIGN * dFdx(v);
return dFdx(v);
}
else if (g_derivative_flag < 0) {
return DFDY_SIGN * dFdy(v);
return dFdy(v);
}
return vec3(0.0);
}

View File

@@ -133,7 +133,7 @@ void main()
/* 3x3 blur */
/* clang-format off */
const float weights3x3[16] = float[16](
const float weights3x3[16] = float_array(
1.0, 2.0, 1.0, 0.0,
2.0, 4.0, 2.0, 0.0,
1.0, 2.0, 1.0, 0.0,
@@ -169,7 +169,7 @@ void main()
/* 5x5 blur */
/* clang-format off */
const float weights5x5[36] = float[36](
const float weights5x5[36] = float_array(
1.0, 2.0, 2.0, 2.0, 1.0, 0.0,
2.0, 5.0, 6.0, 5.0, 2.0, 0.0,
2.0, 6.0, 8.0, 6.0, 2.0, 0.0,

View File

@@ -7,45 +7,45 @@
void normal_transform_object_to_world(vec3 vin, out vec3 vout)
{
/* Expansion of NormalMatrix. */
vout = vin * mat3(ModelMatrixInverse);
vout = vin * to_float3x3(ModelMatrixInverse);
}
void normal_transform_world_to_object(vec3 vin, out vec3 vout)
{
/* Expansion of NormalMatrixInverse. */
vout = vin * mat3(ModelMatrix);
vout = vin * to_float3x3(ModelMatrix);
}
void direction_transform_object_to_world(vec3 vin, out vec3 vout)
{
vout = mat3x3(ModelMatrix) * vin;
vout = to_float3x3(ModelMatrix) * vin;
}
void direction_transform_object_to_view(vec3 vin, out vec3 vout)
{
vout = mat3x3(ModelMatrix) * vin;
vout = mat3x3(ViewMatrix) * vout;
vout = to_float3x3(ModelMatrix) * vin;
vout = to_float3x3(ViewMatrix) * vout;
}
void direction_transform_view_to_world(vec3 vin, out vec3 vout)
{
vout = mat3x3(ViewMatrixInverse) * vin;
vout = to_float3x3(ViewMatrixInverse) * vin;
}
void direction_transform_view_to_object(vec3 vin, out vec3 vout)
{
vout = mat3x3(ViewMatrixInverse) * vin;
vout = mat3x3(ModelMatrixInverse) * vout;
vout = to_float3x3(ViewMatrixInverse) * vin;
vout = to_float3x3(ModelMatrixInverse) * vout;
}
void direction_transform_world_to_view(vec3 vin, out vec3 vout)
{
vout = mat3x3(ViewMatrix) * vin;
vout = to_float3x3(ViewMatrix) * vin;
}
void direction_transform_world_to_object(vec3 vin, out vec3 vout)
{
vout = mat3x3(ModelMatrixInverse) * vin;
vout = to_float3x3(ModelMatrixInverse) * vin;
}
void point_transform_object_to_world(vec3 vin, out vec3 vout)

View File

@@ -13,13 +13,37 @@
#pragma clang diagnostic ignored "-Wunused-variable"
#pragma clang diagnostic ignored "-Wcomment"
#define ENABLE_IF(cond) thread metal::enable_if_t<(cond)> * = nullptr
/* Base instance with offsets. */
#define gpu_BaseInstance gl_BaseInstanceARB
#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)
/* derivative signs. */
#define DFDX_SIGN 1.0
#define DFDY_SIGN 1.0
#ifdef MTL_WORKGROUP_SIZE_X
/* Older Metal compiler version don't treat vector component access as constexpr.
* We have to make a wrapper class for that otherwise we cannot use WorkGroupSize for sizing
* threadgroup arrays. Note that this bug is not present in the version 4.1 of the compiler. */
struct mtl_WorkGroupSize {
union {
struct {
uint x, y, z;
};
uint2 xy;
uint3 xyz;
};
constexpr mtl_WorkGroupSize()
: x(MTL_WORKGROUP_SIZE_X), y(MTL_WORKGROUP_SIZE_Y), z(MTL_WORKGROUP_SIZE_Z)
{
}
constexpr inline operator uint3() const
{
return xyz;
}
};
# define gl_WorkGroupSize mtl_WorkGroupSize()
#endif
/* Type definitions. */
/* int implicitly cast to bool in MSL. */
@@ -53,64 +77,9 @@ using bvec3 = bool3;
using bvec4 = bool4;
/* Compute decorators. */
#define TG threadgroup
#define barrier() \
threadgroup_barrier(mem_flags::mem_threadgroup | mem_flags::mem_device | mem_flags::mem_texture)
#ifdef MTL_USE_WORKGROUP_SIZE
/* Compute work-group size. */
struct constexp_uvec3 {
/* Type union to cover all syntax accessors:
* .x, .y, .z, .xy, .xyz
* Swizzle types invalid.*/
union {
struct {
uint x, y, z;
};
struct {
uint2 xy;
};
uint3 xyz;
};
constexpr constexp_uvec3(uint _x, uint _y, uint _z) : x(_x), y(_y), z(_z) {}
constexpr uint operator[](int i)
{
/* Note: Need to switch on each elem value as array accessor triggers
* non-constant sizing error. This will be statically evaluated at compile time. */
switch (i) {
case 0:
return x;
case 1:
return y;
case 2:
return z;
default:
return 0;
}
}
constexpr inline operator uint3() const
{
return xyz;
}
constexpr inline operator uint2() const
{
return xy;
}
constexpr inline operator uint() const
{
return x;
}
};
constexpr constexp_uvec3 __internal_workgroupsize_get()
{
return constexp_uvec3(MTL_WORKGROUP_SIZE_X, MTL_WORKGROUP_SIZE_Y, MTL_WORKGROUP_SIZE_Z);
}
# define gl_WorkGroupSize __internal_workgroupsize_get()
#endif
/** Shader atomics:
* In order to emulate GLSL-style atomic operations, wherein variables can be used within atomic
* operations, even if they are not explicitly declared atomic, we can cast the pointer to atomic,
@@ -118,162 +87,652 @@ constexpr constexp_uvec3 __internal_workgroupsize_get()
*
* NOTE: We cannot hoist the address space into the template declaration, so these must be declared
* for each relevant address space. */
#define ATOMIC_OP_EX(qualifier, glsl_op, mtl_op) \
template<typename T> T atomic##glsl_op(qualifier T &mem, T data) \
{ \
return atomic_##mtl_op##_explicit((qualifier _atomic<T> *)&mem, data, memory_order_relaxed); \
}
/* Thread-group memory. */
template<typename T> T atomicMax(threadgroup T &mem, T data)
{
return atomic_fetch_max_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicMin(threadgroup T &mem, T data)
{
return atomic_fetch_min_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAdd(threadgroup T &mem, T data)
{
return atomic_fetch_add_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicSub(threadgroup T &mem, T data)
{
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAnd(threadgroup T &mem, T data)
{
return atomic_fetch_and_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicOr(threadgroup T &mem, T data)
{
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicXor(threadgroup T &mem, T data)
{
return atomic_fetch_xor_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicExchange(threadgroup T &mem, T data)
{
return atomic_exchange_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
}
#define ATOMIC_OP(glsl_op, mtl_op) \
ATOMIC_OP_EX(threadgroup, glsl_op, mtl_op) \
ATOMIC_OP_EX(device, glsl_op, mtl_op)
/* Device memory. */
template<typename T> T atomicMax(device T &mem, T data)
{
return atomic_fetch_max_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicMin(device T &mem, T data)
{
return atomic_fetch_min_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAdd(device T &mem, T data)
{
return atomic_fetch_add_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicSub(device T &mem, T data)
{
return atomic_fetch_sub_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicAnd(device T &mem, T data)
{
return atomic_fetch_and_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicOr(device T &mem, T data)
{
return atomic_fetch_or_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicXor(device T &mem, T data)
{
return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
template<typename T> T atomicExchange(device T &mem, T data)
{
return atomic_exchange_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
}
ATOMIC_OP(Max, fetch_max)
ATOMIC_OP(Min, fetch_min)
ATOMIC_OP(Add, fetch_add)
ATOMIC_OP(Sub, fetch_sub)
ATOMIC_OP(And, fetch_and)
ATOMIC_OP(Or, fetch_or)
ATOMIC_OP(Xor, fetch_xor)
ATOMIC_OP(Exchange, exchange)
/* Used to replace 'out' in function parameters with thread-local reference
* shortened to avoid expanding the GLSL source string. */
#define THD thread
#define OUT(type, name, array) thread type(&name)[array]
#undef ATOMIC_OP
/* Generate wrapper structs for combined texture and sampler type. */
#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
# define SAMPLER_DECLARATION constant sampler *samp;
using sampler_ptr = constant sampler *;
#else
# define SAMPLER_DECLARATION thread sampler *samp;
using sampler_ptr = thread sampler *;
#endif
#define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \
template<typename T, access A = access::sample> struct STRUCT_NAME { \
thread TEX_TYPE<T, A> *texture; \
SAMPLER_DECLARATION \
/* Use point sampler instead of texture read to benefit from texture caching and reduce branching
* through removal of bounds tests, as these are handled by the sample operation. */
constexpr sampler _mtl_fetch_samp(address::clamp_to_zero, filter::nearest, coord::pixel);
template<typename T,
access A,
typename TextureT,
bool is_depth,
int Dim,
int Cube,
int Array,
bool Atomic = false>
struct _mtl_sampler {
template<typename U, int S>
using vec_or_scalar = typename metal::conditional<S == 1, U, vec<U, S>>::type;
using FltCoord = vec_or_scalar<float, Dim + Cube + Array>;
using FltDeriv = vec_or_scalar<float, Dim + Cube>;
using IntCoord = vec_or_scalar<int, Dim + Cube + Array>;
using IntDeriv = vec_or_scalar<int, Dim + Cube>;
using UintCoord = vec_or_scalar<uint, Dim + Cube + Array>;
using UintDeriv = vec_or_scalar<uint, Dim + Cube>;
using SizeVec = vec_or_scalar<int, Dim + Array>;
using DataVec = vec<T, 4>;
using AtomicT = T;
/* Template compatible gradient type choosing. */
template<int D, int C> struct gradient_n {};
/* clang-format off */
template<> struct gradient_n<2, 0> { using type = gradient2d; };
template<> struct gradient_n<3, 0> { using type = gradient3d; };
template<> struct gradient_n<2, 1> { using type = gradientcube; };
/* clang-format on */
/* Using `using` would invalidate the whole class. */
#define gradient typename gradient_n<Dim, Cube>::type
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
/* If native texture atomics are unsupported, we instead utilize a custom type which wraps a
* buffer-backed texture. This texture will always be a Texture2D, but will emulate access to
* Texture3D and Texture2DArray by stacking layers.
* Access pattern will be derived based on the source type. 2DArray and 3D atomic texture
* support will require information on the size of each layer within the source 2D texture.
*
* A device pointer to the backing buffer will also be available for the atomic operations.
* NOTE: For atomic ops, it will only be valid to use access::read_write.
* We still need to use the wrapped type for access:sample, as texture2DArray and texture3D
* will require access indirection.
*
* NOTE: Only type of UINT is valid, but full template provided to match syntax of standard
* textures. */
template<typename U, int D, bool At> struct AtomicEmulation {};
template<typename U> struct AtomicEmulation<U, 2, true> {
/** Buffer to do atomic operations on. */
device U *buffer;
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater
* than the texture's native width to satisfy device alignment rules. We need to use the padded
* width when writing to ensure the correct writing location aligns with a given pixel location
* in the texture. */
ushort aligned_width;
int2 to_internal_coord(IntCoord coord) const
{
return coord.xy;
}
uint to_linear_coord(IntCoord coord) const
{
return coord.x + coord.y * aligned_width;
}
};
template<typename U> struct AtomicEmulation<U, 3, true> {
/** Buffer to do atomic operations on. */
device U *buffer;
/** Required to pixel location inside the backing texture 2D space, and texture size query. */
ushort3 texture_size;
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater
* than the texture's native width to satisfy device alignment rules. We need to use the padded
* width when writing to ensure the correct writing location aligns with a given pixel location
* in the texture. */
ushort aligned_width;
int2 to_internal_coord(IntCoord coord) const
{
return int2(coord.x, coord.y + texture_size.y * coord.z);
}
uint to_linear_coord(IntCoord coord) const
{
uint row = coord.y + coord.z * texture_size.y;
return coord.x + row * aligned_width;
}
};
AtomicEmulation<T, Dim + Array, Atomic> atomic;
#endif
thread TextureT *texture;
sampler_ptr samp;
template<int dim, int array, bool At = false> SizeVec size_impl(uint lod) const {}
template<> SizeVec size_impl<1, 0>(uint lod) const
{
return SizeVec(texture->get_width());
}
template<> SizeVec size_impl<1, 1>(uint lod) const
{
return SizeVec(texture->get_width(), texture->get_array_size());
}
template<> SizeVec size_impl<2, 0>(uint lod) const
{
return SizeVec(texture->get_width(lod), texture->get_height(lod));
}
template<> SizeVec size_impl<2, 1>(uint lod) const
{
return SizeVec(texture->get_width(lod), texture->get_height(lod), texture->get_array_size());
}
template<> SizeVec size_impl<3, 0>(uint lod) const
{
return SizeVec(texture->get_width(lod), texture->get_height(lod), texture->get_depth(lod));
}
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
template<> SizeVec size_impl<2, 1, true>(uint lod) const
{
return SizeVec(atomic.texture_size);
}
template<> SizeVec size_impl<3, 0, true>(uint lod) const
{
return SizeVec(atomic.texture_size);
}
#endif
SizeVec size(int lod = 0) const
{
return size_impl<Dim, Array, Atomic>(uint(lod));
}
/* If native texture atomics are unsupported, we instead utilize a custom type which wraps a
* buffer-backed texture. This texture will always be a Texture2D, but will emulate access to
* Texture3D and Texture2DArray by stacking layers.
* Access pattern will be derived based on the source type. 2DArray and 3D atomic texture
* support will require information on the size of each layer within the source 2D texture.
*
* A device pointer to the backing buffer will also be available for the atomic operations.
* NOTE: For atomic ops, it will only be valid to use access::read_write.
* We still need to use the wrapped type for access:sample, as texture2DArray and texture3D
* will require access indirection.
*
* NOTE: Only type of UINT is valid, but full template provided to match syntax of standard
* textures. */
#define ARRAY_FN \
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 1), ENABLE_IF(At == false)>
#define NON_ARRAY_FN \
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 0), ENABLE_IF(At == false)>
NON_ARRAY_FN DataVec sample(FltCoord coord) const
{
return texture->sample(*samp, coord);
}
NON_ARRAY_FN DataVec sample_grad(FltCoord coord, FltDeriv dPdx, FltDeriv dPdy) const
{
return texture->sample(*samp, coord, gradient(dPdx, dPdy));
}
NON_ARRAY_FN DataVec sample_bias(FltCoord coord, bias lod_bias) const
{
return texture->sample(*samp, coord, lod_bias);
}
NON_ARRAY_FN DataVec sample_lod(FltCoord coord, level lod, IntDeriv offset = {0}) const
{
return texture->sample(*samp, coord, lod, offset);
}
NON_ARRAY_FN DataVec gather(FltCoord coord) const
{
return texture->gather(*samp, coord);
}
NON_ARRAY_FN DataVec fetch(IntCoord coord) const
{
return texture->sample(_mtl_fetch_samp, FltCoord(coord));
}
NON_ARRAY_FN DataVec fetch(IntCoord coord, level lod, IntDeriv offset = {0}) const
{
return texture->sample(_mtl_fetch_samp, FltCoord(coord), lod, offset);
}
ARRAY_FN DataVec sample(FltCoord coord) const
{
return texture->sample(*samp, uv_mask(coord), layer_mask(coord));
}
ARRAY_FN DataVec sample_grad(FltCoord coord, FltDeriv dPdx, FltDeriv dPdy) const
{
return texture->sample(*samp, uv_mask(coord), layer_mask(coord), gradient(dPdx, dPdy));
}
ARRAY_FN DataVec sample_bias(FltCoord coord, bias lod_bias) const
{
return texture->sample(*samp, uv_mask(coord), layer_mask(coord), lod_bias);
}
ARRAY_FN DataVec sample_lod(FltCoord coord, level lod, IntDeriv offset = {0}) const
{
return texture->sample(*samp, uv_mask(coord), layer_mask(coord), lod, offset);
}
ARRAY_FN DataVec gather(FltCoord coord) const
{
return texture->gather(*samp, uv_mask(coord), layer_mask(coord));
}
ARRAY_FN DataVec fetch(IntCoord coord) const
{
return texture->sample(_mtl_fetch_samp, uv_mask(coord), layer_mask(coord));
}
ARRAY_FN DataVec fetch(IntCoord coord, level lod, IntDeriv ofs = {0}) const
{
return texture->sample(_mtl_fetch_samp, uv_mask(coord), layer_mask(coord), lod, ofs);
}
#undef gradient
#undef ARRAY_FN
#undef NON_ARRAY_FN
/**
* Image functions.
* To be split to its own class.
*/
#define ARRAY_FN \
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 1), ENABLE_IF(At == false)>
#define NON_ARRAY_FN \
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 0), ENABLE_IF(At == false)>
NON_ARRAY_FN DataVec load(IntCoord coord) const
{
return texture->read(UintCoord(coord), 0);
}
NON_ARRAY_FN void store(DataVec data, IntCoord coord) const
{
texture->write(data, UintCoord(coord), 0);
}
ARRAY_FN DataVec load(IntCoord coord) const
{
return texture->read(uv_mask_img(coord), layer_mask_img(coord), 0);
}
ARRAY_FN void store(DataVec data, IntCoord coord) const
{
texture->write(data, uv_mask_img(coord), layer_mask_img(coord), 0);
}
#undef ARRAY_FN
#undef NON_ARRAY_FN
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
/* Atomic samplers only support `textureFetch` as the texture layout doesn't allow filtering. */
template<typename T, access A = access::sample>
struct _mtl_combined_image_sampler_2d_atomic_fallback {
thread texture2d<T, A> *texture;
SAMPLER_DECLARATION
device T *buffer;
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than
* the texture's native width to satisfy device alignment rules. We need to use the padded width
* when writing to ensure the
* correct writing location aligns with a given pixel location in the texture. */
uint aligned_width;
};
# define ATOMIC_FN template<bool At = Atomic, ENABLE_IF(At == true)>
template<typename T, access A = access::sample>
struct _mtl_combined_image_sampler_2d_array_atomic_fallback {
thread texture2d<T, A> *texture;
SAMPLER_DECLARATION
device T *buffer;
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than
* the texture's native width to satisfy device alignment rules. We need to use the padded width
* when writing to ensure the
* correct writing location aligns with a given pixel location in the texture. */
uint aligned_width;
/* Texture size required to determine location offset of array layer with 2D texture space. */
ushort3 texture_size;
};
ATOMIC_FN DataVec fetch(IntCoord coord) const
{
int2 coord_2d = atomic.to_internal_coord(coord);
return texture->sample(_mtl_fetch_samp, float2(coord_2d));
}
ATOMIC_FN DataVec fetch(IntCoord coord, level lod, IntDeriv offset = {0}) const
{
int2 coord_2d = atomic.to_internal_coord(coord);
return texture->sample(_mtl_fetch_samp, float2(coord_2d), lod, offset);
}
template<typename T, access A = access::sample>
struct _mtl_combined_image_sampler_3d_atomic_fallback {
thread texture2d<T, A> *texture;
SAMPLER_DECLARATION
device T *buffer;
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than
* the texture's native width to satisfy device alignment rules. We need to use the padded width
* when writing to ensure the
* correct writing location aligns with a given pixel location in the texture. */
uint aligned_width;
/* Texture size required to determine location offset of array layer with 2D texture space. */
ushort3 texture_size;
};
ATOMIC_FN DataVec load(IntCoord coord) const
{
int2 coord_2d = atomic.to_internal_coord(coord);
return texture->read(uint2(coord_2d), 0);
}
ATOMIC_FN void store(DataVec data, IntCoord coord) const
{
int2 coord_2d = atomic.to_internal_coord(coord);
texture->write(data, uint2(coord_2d), 0);
}
# undef ATOMIC_FN
AtomicT atomic_min(IntCoord coord, AtomicT data) const
{
return atomicMin(atomic.buffer[atomic.to_linear_coord(coord)], data);
}
AtomicT atomic_max(IntCoord coord, AtomicT data) const
{
return atomicMax(atomic.buffer[atomic.to_linear_coord(coord)], data);
}
AtomicT atomic_add(IntCoord coord, AtomicT data) const
{
return atomicAdd(atomic.buffer[atomic.to_linear_coord(coord)], data);
}
AtomicT atomic_and(IntCoord coord, AtomicT data) const
{
return atomicAnd(atomic.buffer[atomic.to_linear_coord(coord)], data);
}
AtomicT atomic_or(IntCoord coord, AtomicT data) const
{
return atomicOr(atomic.buffer[atomic.to_linear_coord(coord)], data);
}
AtomicT atomic_xor(IntCoord coord, AtomicT data) const
{
return atomicXor(atomic.buffer[atomic.to_linear_coord(coord)], data);
}
AtomicT atomic_exchange(IntCoord coord, AtomicT data) const
{
return atomicExchange(atomic.buffer[atomic.to_linear_coord(coord)], data);
}
#else
# define NON_ARRAY_ATOMIC \
template<typename U = T, \
int Ar = Array, \
ENABLE_IF(metal::is_integral_v<U> == true), \
ENABLE_IF(sizeof(U) == 4), \
ENABLE_IF(Ar == 0)>
# define ARRAY_ATOMIC \
template<typename U = T, \
int Ar = Array, \
ENABLE_IF(metal::is_integral_v<U> == true), \
ENABLE_IF(sizeof(U) == 4), \
ENABLE_IF(Ar == 1)>
NON_ARRAY_ATOMIC AtomicT atomic_min(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_min(UintCoord(coord), data).x;
}
NON_ARRAY_ATOMIC AtomicT atomic_max(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_max(UintCoord(coord), data).x;
}
NON_ARRAY_ATOMIC AtomicT atomic_add(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_add(UintCoord(coord), data).x;
}
NON_ARRAY_ATOMIC AtomicT atomic_and(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_and(UintCoord(coord), data).x;
}
NON_ARRAY_ATOMIC AtomicT atomic_or(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_or(UintCoord(coord), data).x;
}
NON_ARRAY_ATOMIC AtomicT atomic_xor(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_xor(UintCoord(coord), data).x;
}
NON_ARRAY_ATOMIC AtomicT atomic_exchange(IntCoord coord, AtomicT data) const
{
return texture->atomic_exchange(UintCoord(coord), data).x;
}
ARRAY_ATOMIC AtomicT atomic_min(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_min(uv_mask_img(coord), layer_mask_img(coord), data).x;
}
ARRAY_ATOMIC AtomicT atomic_max(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_max(uv_mask_img(coord), layer_mask_img(coord), data).x;
}
ARRAY_ATOMIC AtomicT atomic_add(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_add(uv_mask_img(coord), layer_mask_img(coord), data).x;
}
ARRAY_ATOMIC AtomicT atomic_and(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_and(uv_mask_img(coord), layer_mask_img(coord), data).x;
}
ARRAY_ATOMIC AtomicT atomic_or(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_or(uv_mask_img(coord), layer_mask_img(coord), data).x;
}
ARRAY_ATOMIC AtomicT atomic_xor(IntCoord coord, AtomicT data) const
{
return texture->atomic_fetch_xor(uv_mask_img(coord), layer_mask_img(coord), data).x;
}
ARRAY_ATOMIC AtomicT atomic_exchange(IntCoord coord, AtomicT data) const
{
return texture->atomic_exchange(uv_mask_img(coord), layer_mask_img(coord), data).x;
}
# undef NON_ARRAY_ATOMIC
# undef ARRAY_ATOMIC
#endif
void fence()
{
texture->fence();
}
private:
template<typename U, typename V> static U reshape(V v) {}
/* clang-format off */
template<> float reshape<float>(float2 v) { return v.x; }
template<> float2 reshape<float2>(float3 v) { return v.xy; }
template<> float3 reshape<float3>(float4 v) { return v.xyz; }
template<> int reshape<int>(int2 v) { return v.x; }
template<> int2 reshape<int2>(int3 v) { return v.xy; }
template<> int3 reshape<int3>(int4 v) { return v.xyz; }
/* clang-format on */
FltDeriv uv_mask(FltCoord coord) const
{
return reshape<FltDeriv>(coord);
}
FltDeriv uv_mask(IntCoord coord) const
{
return FltDeriv(reshape<IntDeriv>(coord));
}
uint layer_mask(FltCoord coord) const
{
return coord[Dim + Cube];
}
uint layer_mask(IntCoord coord) const
{
return coord[Dim + Cube];
}
UintDeriv uv_mask_img(IntCoord coord) const
{
return UintDeriv(reshape<IntDeriv>(coord));
}
uint layer_mask_img(IntCoord coord) const
{
return coord[Dim + Cube];
}
};
/** Sampler functions */
#define SAMPLER_FN \
template<typename SamplerT, \
typename FltCoord = typename SamplerT::FltCoord, \
typename FltDeriv = typename SamplerT::FltDeriv, \
typename IntCoord = typename SamplerT::IntCoord, \
typename IntDeriv = typename SamplerT::IntDeriv, \
typename UintCoord = typename SamplerT::UintCoord, \
typename UintDeriv = typename SamplerT::UintDeriv, \
typename SizeVec = typename SamplerT::SizeVec, \
typename DataVec = typename SamplerT::DataVec>
SAMPLER_FN SizeVec textureSize(SamplerT texture, int lod)
{
return texture.size(lod);
}
SAMPLER_FN DataVec texture(SamplerT texture, FltCoord coord)
{
return texture.sample(coord);
}
SAMPLER_FN DataVec texture(SamplerT texture, FltCoord coord, float lod_bias)
{
return texture.sample_bias(coord, bias(lod_bias));
}
SAMPLER_FN DataVec textureLod(SamplerT texture, FltCoord coord, float lod)
{
return texture.sample_lod(coord, level(lod));
}
SAMPLER_FN DataVec textureLodOffset(SamplerT texture, FltCoord coord, float lod, IntDeriv offset)
{
return texture.sample_lod(coord, level(lod), offset);
}
SAMPLER_FN DataVec textureGather(SamplerT texture, FltCoord coord)
{
return texture.gather(coord);
}
SAMPLER_FN DataVec textureGrad(SamplerT texture, FltCoord coord, FltDeriv dPdx, FltDeriv dPdy)
{
return texture.sample_grad(coord, dPdx, dPdy);
}
SAMPLER_FN DataVec texelFetch(SamplerT texture, IntCoord coord, int lod)
{
return texture.fetch(coord, level(lod));
}
SAMPLER_FN DataVec texelFetchOffset(SamplerT texture, IntCoord coord, int lod, IntDeriv offset)
{
return texture.fetch(coord, level(lod), offset);
}
#undef SAMPLER_FN
/** Image functions */
#define IMAGE_FN \
template<typename SamplerT, \
typename IntCoord = typename SamplerT::IntCoord, \
typename IntDeriv = typename SamplerT::IntDeriv, \
typename UintCoord = typename SamplerT::UintCoord, \
typename UintDeriv = typename SamplerT::UintDeriv, \
typename SizeVec = typename SamplerT::SizeVec, \
typename DataVec = typename SamplerT::DataVec, \
typename AtomicT = typename SamplerT::AtomicT>
IMAGE_FN SizeVec imageSize(SamplerT texture)
{
return texture.size();
}
IMAGE_FN void imageFence(SamplerT texture)
{
return texture.fence();
}
IMAGE_FN DataVec imageLoad(SamplerT texture, IntCoord coord)
{
if (any(UintCoord(coord) >= UintCoord(texture.size()))) {
return DataVec(0);
}
return texture.load(coord);
}
IMAGE_FN DataVec imageLoadFast(SamplerT texture, IntCoord coord)
{
return texture.load(coord);
}
IMAGE_FN void imageStore(SamplerT texture, IntCoord coord, DataVec data)
{
if (any(UintCoord(coord) >= UintCoord(texture.size()))) {
return;
}
texture.store(data, coord);
}
IMAGE_FN
void imageStoreFast(SamplerT texture, IntCoord coord, DataVec data)
{
texture.store(data, coord);
}
IMAGE_FN AtomicT imageAtomicMin(SamplerT texture, IntCoord coord, AtomicT data)
{
return texture.atomic_min(coord, data);
}
IMAGE_FN AtomicT imageAtomicMax(SamplerT texture, IntCoord coord, AtomicT data)
{
return texture.atomic_max(coord, data);
}
IMAGE_FN AtomicT imageAtomicAdd(SamplerT texture, IntCoord coord, AtomicT data)
{
return texture.atomic_add(coord, data);
}
IMAGE_FN AtomicT imageAtomicAnd(SamplerT texture, IntCoord coord, AtomicT data)
{
return texture.atomic_and(coord, data);
}
IMAGE_FN AtomicT imageAtomicOr(SamplerT texture, IntCoord coord, AtomicT data)
{
return texture.atomic_or(coord, data);
}
IMAGE_FN AtomicT imageAtomicXor(SamplerT texture, IntCoord coord, AtomicT data)
{
return texture.atomic_xor(coord, data);
}
IMAGE_FN AtomicT imageAtomicExchange(SamplerT texture, IntCoord coord, AtomicT data)
{
return texture.atomic_exchange(coord, data);
}
#undef IMAGE_FN
/* Add any types as needed. */
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array);
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array);
#define TEMPLATE template<typename T = float, access A = access::sample>
TEMPLATE using depth2D = _mtl_sampler<T, A, depth2d<T, A>, true, 2, 0, 0>;
TEMPLATE using depth2DArray = _mtl_sampler<T, A, depth2d_array<T, A>, true, 2, 0, 1>;
TEMPLATE using depthCube = _mtl_sampler<T, A, texturecube<T, A>, true, 2, 1, 1>;
TEMPLATE using depthCubeArray = _mtl_sampler<T, A, texturecube_array<T, A>, true, 2, 1, 1>;
TEMPLATE using sampler1D = _mtl_sampler<T, A, texture1d<T, A>, false, 1, 0, 0>;
TEMPLATE using sampler1DArray = _mtl_sampler<T, A, texture1d_array<T, A>, false, 1, 0, 1>;
TEMPLATE using sampler2D = _mtl_sampler<T, A, texture2d<T, A>, false, 2, 0, 0>;
TEMPLATE using sampler2DArray = _mtl_sampler<T, A, texture2d_array<T, A>, false, 2, 0, 1>;
TEMPLATE using sampler3D = _mtl_sampler<T, A, texture3d<T, A>, false, 3, 0, 0>;
TEMPLATE using samplerBuffer = _mtl_sampler<T, A, texture_buffer<T, A>, false, 1, 0, 0>;
TEMPLATE using samplerCube = _mtl_sampler<T, A, texturecube<T, A>, false, 2, 1, 0>;
TEMPLATE using samplerCubeArray = _mtl_sampler<T, A, texturecube_array<T, A>, false, 2, 1, 1>;
/* Atomic textures are defined as 2D textures with special layout for 3D texture emulation. */
TEMPLATE using sampler2DAtomic = _mtl_sampler<T, A, texture2d<T, A>, false, 2, 0, 0, true>;
TEMPLATE using sampler2DArrayAtomic = _mtl_sampler<T, A, texture2d<T, A>, false, 2, 0, 1, true>;
TEMPLATE using sampler3DAtomic = _mtl_sampler<T, A, texture2d<T, A>, false, 3, 0, 0, true>;
/* Used by backend to declare the samplers. Could be removed. */
TEMPLATE using _mtl_sampler_depth_2d = depth2D<T, A>;
TEMPLATE using _mtl_sampler_depth_2d_array = depth2DArray<T, A>;
TEMPLATE using _mtl_sampler_depth_cube = depthCube<T, A>;
TEMPLATE using _mtl_sampler_depth_cube_array = depthCubeArray<T, A>;
TEMPLATE using _mtl_sampler_1d = sampler1D<T, A>;
TEMPLATE using _mtl_sampler_1d_array = sampler1DArray<T, A>;
TEMPLATE using _mtl_sampler_2d = sampler2D<T, A>;
TEMPLATE using _mtl_sampler_2d_array = sampler2DArray<T, A>;
TEMPLATE using _mtl_sampler_3d = sampler3D<T, A>;
TEMPLATE using _mtl_sampler_buffer = samplerBuffer<T, A>;
TEMPLATE using _mtl_sampler_cube = samplerCube<T, A>;
TEMPLATE using _mtl_sampler_cube_array = samplerCubeArray<T, A>;
TEMPLATE using _mtl_sampler_2d_atomic = sampler2DAtomic<T, A>;
TEMPLATE using _mtl_sampler_2d_array_atomic = sampler2DArrayAtomic<T, A>;
TEMPLATE using _mtl_sampler_3d_atomic = sampler3DAtomic<T, A>;
#undef TEMPLATE
/* Variant for 1D samplers. Discard the lod. */
template<typename T, access A>
typename sampler1D<T, A>::DataVec texelFetch(sampler1D<T, A> texture, int coord, int lod = 0)
{
return texture.fetch(coord);
}
/* Variant for 1DArray samplers. Discard the lod. */
template<typename T, access A>
typename sampler1DArray<T, A>::DataVec texelFetch(sampler1DArray<T, A> texture,
int2 coord,
int lod = 0)
{
return texture.fetch(coord);
}
/* Variant for buffer samplers. Discard the lod. */
template<typename T, access A>
typename samplerBuffer<T, A>::DataVec texelFetch(samplerBuffer<T, A> texture,
int coord,
int lod = 0)
{
uint texel = uint(coord);
if (texel < texture.texture->get_width()) {
return texture.texture->read(texel);
}
return typename samplerBuffer<T, A>::DataVec(0);
}
/* Sampler struct for argument buffer. */
#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
@@ -283,51 +742,51 @@ struct SStruct {
#endif
/* Samplers as function parameters. */
#define sampler1D thread _mtl_combined_image_sampler_1d<float>
#define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float>
#define sampler2D thread _mtl_combined_image_sampler_2d<float>
#define depth2D thread _mtl_combined_image_sampler_depth_2d<float>
#define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float>
#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
#define depth2DArray thread _mtl_combined_image_sampler_depth_2d_array<float>
#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
#define sampler3D thread _mtl_combined_image_sampler_3d<float>
#define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read>
#define samplerCube thread _mtl_combined_image_sampler_cube<float>
#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float>
#define sampler1D thread _mtl_sampler_1d<float>
#define sampler1DArray thread _mtl_sampler_1d_array<float>
#define sampler2D thread _mtl_sampler_2d<float>
#define depth2D thread _mtl_sampler_depth_2d<float>
#define sampler2DArray thread _mtl_sampler_2d_array<float>
#define sampler2DArrayShadow thread _mtl_sampler_depth_2d_array<float>
#define depth2DArray thread _mtl_sampler_depth_2d_array<float>
#define depth2DArrayShadow thread _mtl_sampler_depth_2d_array<float>
#define sampler3D thread _mtl_sampler_3d<float>
#define samplerBuffer thread _mtl_sampler_buffer<float, access::read>
#define samplerCube thread _mtl_sampler_cube<float>
#define samplerCubeArray thread _mtl_sampler_cube_array<float>
#define usampler1D thread _mtl_combined_image_sampler_1d<uint>
#define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint>
#define usampler2D thread _mtl_combined_image_sampler_2d<uint>
#define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint>
#define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint>
#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
#define usampler3D thread _mtl_combined_image_sampler_3d<uint>
#define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read>
#define usamplerCube thread _mtl_combined_image_sampler_cube<uint>
#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint>
#define usampler1D thread _mtl_sampler_1d<uint>
#define usampler1DArray thread _mtl_sampler_1d_array<uint>
#define usampler2D thread _mtl_sampler_2d<uint>
#define udepth2D thread _mtl_sampler_depth_2d<uint>
#define usampler2DArray thread _mtl_sampler_2d_array<uint>
#define usampler2DArrayShadow thread _mtl_sampler_depth_2d_array<uint>
#define udepth2DArrayShadow thread _mtl_sampler_depth_2d_array<uint>
#define usampler3D thread _mtl_sampler_3d<uint>
#define usamplerBuffer thread _mtl_sampler_buffer<uint, access::read>
#define usamplerCube thread _mtl_sampler_cube<uint>
#define usamplerCubeArray thread _mtl_sampler_cube_array<uint>
#define isampler1D thread _mtl_combined_image_sampler_1d<int>
#define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int>
#define isampler2D thread _mtl_combined_image_sampler_2d<int>
#define idepth2D thread _mtl_combined_image_sampler_depth_2d<int>
#define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int>
#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
#define isampler3D thread _mtl_combined_image_sampler_3d<int>
#define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read>
#define isamplerCube thread _mtl_combined_image_sampler_cube<int>
#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int>
#define isampler1D thread _mtl_sampler_1d<int>
#define isampler1DArray thread _mtl_sampler_1d_array<int>
#define isampler2D thread _mtl_sampler_2d<int>
#define idepth2D thread _mtl_sampler_depth_2d<int>
#define isampler2DArray thread _mtl_sampler_2d_array<int>
#define isampler2DArrayShadow thread _mtl_sampler_depth_2d_array<int>
#define idepth2DArrayShadow thread _mtl_sampler_depth_2d_array<int>
#define isampler3D thread _mtl_sampler_3d<int>
#define isamplerBuffer thread _mtl_sampler_buffer<int, access::read>
#define isamplerCube thread _mtl_sampler_cube<int>
#define isamplerCubeArray thread _mtl_sampler_cube_array<int>
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
/* If texture atomics are unsupported, map atomic types to internal atomic fallback type. */
# define usampler2DArrayAtomic _mtl_combined_image_sampler_2d_array_atomic_fallback<uint>
# define usampler2DAtomic _mtl_combined_image_sampler_2d_atomic_fallback<uint>
# define usampler3DAtomic _mtl_combined_image_sampler_3d_atomic_fallback<uint>
# define isampler2DArrayAtomic _mtl_combined_image_sampler_2d_array_atomic_fallback<int>
# define isampler2DAtomic _mtl_combined_image_sampler_2d_atomic_fallback<int>
# define isampler3DAtomic _mtl_combined_image_sampler_3d_atomic_fallback<int>
# define usampler2DArrayAtomic _mtl_sampler_2d_array_atomic<uint>
# define usampler2DAtomic _mtl_sampler_2d_atomic<uint>
# define usampler3DAtomic _mtl_sampler_3d_atomic<uint>
# define isampler2DArrayAtomic _mtl_sampler_2d_array_atomic<int>
# define isampler2DAtomic _mtl_sampler_2d_atomic<int>
# define isampler3DAtomic _mtl_sampler_3d_atomic<int>
#else
# define usampler2DArrayAtomic usampler2DArray
# define usampler2DAtomic usampler2D
@@ -340,1544 +799,32 @@ struct SStruct {
/* Vector accessor aliases. */
#define st xy
/* Texture functions. */
#define texelFetch _texelFetch_internal
#define texelFetchOffset(__tex, __texel, __lod, __offset) \
_texelFetch_internal(__tex, __texel, __lod, __offset)
#define imageLoad(__image, __coord) _texelFetch_internal(__image, __coord, 0)
#define imageLoadFast(__image, __coord) _texelFetch_internal_fast(__image, __coord, 0)
#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv)
#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias)))
#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod)))
#define textureLodOffset(__tex, __uv, __lod, __offset) \
_texture_internal_level(__tex, __uv, level(float(__lod)), __offset)
#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0)
#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp)
#define textureGatherOffset(__tex, __offset, __uv, __comp) \
_texture_gather_internal(__tex, __uv, __comp, __offset)
#define textureGrad(__tex, __uv, __dpdx, __dpdy) \
_texture_grad_internal(__tex, __uv, __dpdx, __dpdy)
#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME
#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__)
#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__)
/* Texture-write functions. */
#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value)
#define imageStoreFast(_tex, _coord, _value) _texture_write_internal_fast(_tex, _coord, _value)
/* Texture synchronization functions. */
#define imageFence(image) image.texture->fence()
/* Singular return values from texture functions of type DEPTH are often indexed with either .r or
* .x. This is a lightweight wrapper type for handling this syntax. */
union _msl_return_float {
float r;
float x;
inline operator float() const
{
return r;
}
};
/* Add custom texture sampling/reading routines for each type to account for special return cases,
* e.g. returning a float with an r parameter Note: Cannot use template specialization for input
* type, as return types are specific to the signature of 'tex'. */
/* Use point sampler instead of texture read to benefit from texture caching and reduce branching
* through removal of bounds tests, as these are handled by the sample operation. */
constexpr sampler _point_sample_(address::clamp_to_zero, filter::nearest, coord::pixel);
/* Texture Read via point sampling.
* NOTE: These templates will evaluate first for texture resources bound with sample. */
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, access::sample> tex,
T texel,
uint lod = 0)
{
return tex.texture->sample(_point_sample_, float(texel));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, access::sample> tex,
T texel,
uint lod,
T offset)
{
return tex.texture->sample(_point_sample_, float(texel + offset));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_1d_array<S, access::sample> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0, 0))
{
return tex.texture->sample(_point_sample_, float(texel.x + offset.x), uint(texel.y + offset.y));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, access::sample> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
return tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_2d_array<S, access::sample> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->sample(
_point_sample_, float2(texel.xy + offset.xy), uint(texel.z + offset.z), level(lod));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, access::sample> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->sample(_point_sample_, float3(texel.xyz + offset.xyz), level(lod));
}
template<typename T>
inline _msl_return_float _texelFetch_internal(
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
_msl_return_float fl = {
tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod))};
return fl;
}
template<typename S, typename T>
inline vec<S, 4> _texture_internal_samp(
thread _mtl_combined_image_sampler_2d_array<S, access::sample> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->sample(
_point_sample_, float2(texel.xy + offset.xy), uint(texel.z + offset.z), level(lod));
}
/* Texture Read via read operation. Required by compute/image-bindings. */
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
T texel,
uint lod = 0)
{
float w = tex.texture->get_width();
if (texel >= 0 && texel < w) {
return tex.texture->read(uint(texel));
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
T texel,
uint lod = 0)
{
return tex.texture->read(uint(texel));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(
const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel, uint lod = 0)
{
float w = tex.texture->get_width();
if (texel >= 0 && texel < w) {
return tex.texture->read(uint(texel));
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal_fast(
const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel, uint lod = 0)
{
return tex.texture->read(uint(texel));
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
T texel,
uint lod,
T offset)
{
float w = tex.texture->get_width();
if ((texel + offset) >= 0 && (texel + offset) < w) {
/* LODs not supported for 1d textures. This must be zero. */
return tex.texture->read(uint(texel + offset), 0);
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
T texel,
uint lod,
T offset)
{
/* LODs not supported for 1d textures. This must be zero. */
return tex.texture->read(uint(texel + offset), 0);
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0, 0))
{
float w = tex.texture->get_width();
float h = tex.texture->get_array_size();
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
(texel.y + offset.y) < h)
{
/* LODs not supported for 1d textures. This must be zero. */
return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0);
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0, 0))
{
/* LODs not supported for 1d textures. This must be zero. */
return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0);
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
float w = tex.texture->get_width() >> lod;
float h = tex.texture->get_height() >> lod;
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
(texel.y + offset.y) < h)
{
return tex.texture->read(uint2(texel + offset), lod);
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
return tex.texture->read(uint2(texel + offset), lod);
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
float w = tex.texture->get_width() >> lod;
float h = tex.texture->get_height() >> lod;
float d = tex.texture->get_array_size();
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
(texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d)
{
return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
float w = tex.texture->get_width() >> lod;
float h = tex.texture->get_height() >> lod;
float d = tex.texture->get_depth() >> lod;
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
(texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d)
{
return tex.texture->read(uint3(texel + offset), lod);
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->read(uint3(texel + offset), lod);
}
template<typename T, access A>
inline _msl_return_float _texelFetch_internal(
thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
float w = tex.texture->get_width() >> lod;
float h = tex.texture->get_height() >> lod;
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
(texel.y + offset.y) < h)
{
_msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)};
return fl;
}
else {
_msl_return_float fl = {0};
return fl;
}
}
template<typename T, access A>
inline _msl_return_float _texelFetch_internal_fast(
thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
_msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)};
return fl;
}
template<typename S, typename T, access A>
inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
float w = tex.texture->get_width() >> lod;
float h = tex.texture->get_height() >> lod;
float d = tex.texture->get_array_size();
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
(texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d)
{
return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
}
else {
return vec<S, 4>(0);
}
}
/* Sample. */
template<typename T>
inline vec<T, 4> _texture_internal_samp(
thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u)
{
return tex.texture->sample(*tex.samp, u);
}
inline float4 _texture_internal_samp(
thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua)
{
return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
}
inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
float2 uv)
{
return tex.texture->sample(*tex.samp, uv);
}
inline uint4 _texture_internal_samp(
thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv)
{
return tex.texture->sample(*tex.samp, uv);
}
inline float4 _texture_internal_samp(
thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv)
{
return tex.texture->sample(*tex.samp, uv);
}
inline _msl_return_float _texture_internal_samp(
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv)
{
_msl_return_float fl = {tex.texture->sample(*tex.samp, uv)};
return fl;
}
template<typename T>
inline vec<T, 4> _texture_internal_samp(
thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw)
{
return tex.texture->sample(*tex.samp, uvw);
}
template<typename T>
inline vec<T, 4> _texture_internal_samp(
thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva)
{
return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z));
}
inline _msl_return_float _texture_internal_samp(
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva)
{
_msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))};
return fl;
}
inline _msl_return_float _texture_internal_samp(
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac)
{
_msl_return_float fl = {
tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))};
return fl;
}
template<typename T>
inline vec<T, 4> _texture_internal_samp(
thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs)
{
return tex.texture->sample(*tex.samp, uvs.xyz);
}
template<typename T>
inline vec<T, 4> _texture_internal_samp(
thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a)
{
return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w));
}
/* Sample Level. */
template<typename T>
inline vec<T, 4> _texture_internal_level(
thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
float u,
level options,
int offset = 0)
{
/* LODs not supported for 1d textures. This must be zero. */
return tex.texture->sample(*tex.samp, u);
}
inline float4 _texture_internal_level(
thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex,
float2 ua,
level options,
int offset = 0)
{
/* LODs not supported for 1d textures. This must be zero. */
return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
}
inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
float2 uv,
level options,
int2 offset = int2(0))
{
return tex.texture->sample(*tex.samp, uv, options, offset);
}
inline uint4 _texture_internal_level(
thread _mtl_combined_image_sampler_2d<uint, access::sample> tex,
float2 uv,
level options,
int2 offset = int2(0))
{
return tex.texture->sample(*tex.samp, uv, options, offset);
}
inline float4 _texture_internal_level(
thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
float2 uv,
level options,
int2 offset = int2(0))
{
return tex.texture->sample(*tex.samp, uv, options, offset);
}
inline _msl_return_float _texture_internal_level(
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
float2 uv,
level options,
int2 offset = int2(0))
{
_msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
return fl;
}
template<typename T>
inline vec<T, 4> _texture_internal_level(
thread _mtl_combined_image_sampler_3d<T, access::sample> tex,
float3 uvw,
level options = level(0),
int3 offset = int3(0))
{
return tex.texture->sample(*tex.samp, uvw, options, offset);
}
template<typename T>
inline vec<T, 4> _texture_internal_level(
thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
float3 uva,
level options = level(0),
int2 offset = int2(0))
{
return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset);
}
inline _msl_return_float _texture_internal_level(
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
float3 uva,
level options = level(0),
int2 offset = int2(0))
{
_msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)};
return fl;
}
inline _msl_return_float _texture_internal_level(
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
float4 uvac,
level options = level(0),
int2 offset = int2(0))
{
_msl_return_float fl = {
tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)};
return fl;
}
template<typename T>
inline vec<T, 4> _texture_internal_level(
thread _mtl_combined_image_sampler_cube<T, access::sample> tex,
float3 uvs,
level options = level(0),
int2 offset = int2(0))
{
return tex.texture->sample(*tex.samp, uvs.xyz, options);
}
template<typename T>
inline vec<T, 4> _texture_internal_level(
thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex,
float4 coord_a,
level options = level(0),
int3 offset = int3(0))
{
return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options);
}
/* Sample Bias. */
template<typename T>
inline vec<T, 4> _texture_internal_bias(
thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
float u,
bias options = bias(0.0),
int offset = 0)
{
return tex.texture->sample(*tex.samp, u);
}
inline float4 _texture_internal_bias(
thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
float2 uv,
bias options = bias(0.0),
int2 offset = int2(0))
{
return tex.texture->sample(*tex.samp, uv, options, offset);
}
inline _msl_return_float _texture_internal_bias(
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
float2 uv,
bias options = bias(0),
int2 offset = int2(0))
{
_msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
return fl;
}
/* Texture Gather. */
component int_to_component(const int comp)
{
switch (comp) {
default:
case 0:
return component::x;
case 1:
return component::y;
case 2:
return component::z;
case 3:
return component::w;
}
return component::x;
}
inline float4 _texture_gather_internal(
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
float2 uv,
const int comp = 0,
int2 offset = int2(0))
{
return tex.texture->gather(*tex.samp, uv, offset);
}
inline float4 _texture_gather_internal(
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
float3 uva,
const int comp = 0,
int2 offset = int2(0))
{
return tex.texture->gather(*tex.samp, uva.xy, uint(uva.z), offset);
}
template<typename T>
inline vec<T, 4> _texture_gather_internal(
thread _mtl_combined_image_sampler_2d<T, access::sample> tex,
float2 uv,
const int comp = 0,
int2 offset = int2(0))
{
return tex.texture->gather(*tex.samp, uv, offset);
}
template<typename T>
inline vec<T, 4> _texture_gather_internal(
thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
float3 uva,
const int comp = 0,
int2 offset = int2(0))
{
return tex.texture->gather(*tex.samp, uva.xy, uint(uva.z), offset);
}
/* Texture Grad. */
inline float4 _texture_grad_internal(
thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
float2 uv,
float2 dpdx,
float2 dpdy)
{
return tex.texture->sample(*tex.samp, uv, gradient2d(dpdx, dpdy));
}
inline float4 _texture_grad_internal(
thread _mtl_combined_image_sampler_2d_array<float, access::sample> tex,
float3 uva,
float2 dpdx,
float2 dpdy)
{
return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), gradient2d(dpdx, dpdy));
}
inline float4 _texture_grad_internal(
thread _mtl_combined_image_sampler_3d<float, access::sample> tex,
float3 uvw,
float3 dpdx,
float3 dpdy)
{
return tex.texture->sample(*tex.samp, uvw, gradient3d(dpdx, dpdy));
}
/* Texture write support. */
template<typename S, typename T, access A>
inline void _texture_write_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
T _coord,
vec<S, 4> value)
{
float w = tex.texture->get_width();
if (_coord >= 0 && _coord < w) {
tex.texture->write(value, uint(_coord));
}
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
T _coord,
vec<S, 4> value)
{
tex.texture->write(value, uint(_coord));
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint(_coord));
}
template<typename S, typename T, access A>
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
T _coord,
vec<S, 4> value)
{
float w = tex.texture->get_width();
float h = tex.texture->get_height();
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) {
tex.texture->write(value, uint2(_coord.xy));
}
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
T _coord,
vec<S, 4> value)
{
tex.texture->write(value, uint2(_coord.xy));
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint2(_coord.xy));
}
template<typename S, typename T, access A>
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
T _coord,
vec<S, 4> value)
{
float w = tex.texture->get_width();
float h = tex.texture->get_height();
float d = tex.texture->get_array_size();
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
_coord.z < d)
{
tex.texture->write(value, uint2(_coord.xy), _coord.z);
}
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
T _coord,
vec<S, 4> value)
{
tex.texture->write(value, uint2(_coord.xy), _coord.z);
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint2(_coord.xy), _coord.z);
}
template<typename S, typename T, access A>
inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
T _coord,
vec<S, 4> value)
{
float w = tex.texture->get_width();
float h = tex.texture->get_height();
float d = tex.texture->get_depth();
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
_coord.z < d)
{
tex.texture->write(value, uint3(_coord.xyz));
}
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
T _coord,
vec<S, 4> value)
{
tex.texture->write(value, uint3(_coord.xyz));
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
T _coord,
S value)
{
tex.texture->write(value, uint3(_coord.xyz));
}
/* Texture atomic operations are only supported in Metal 3.1 and onward (macOS 14.0 Sonoma). */
#ifdef MTL_SUPPORTS_TEXTURE_ATOMICS
/* Image atomic operations. */
# define imageAtomicMin(tex, coord, data) _texture_image_atomic_min_internal(tex, coord, data)
# define imageAtomicAdd(tex, coord, data) _texture_image_atomic_add_internal(tex, coord, data)
# define imageAtomicExchange(tex, coord, data) \
_texture_image_atomic_exchange_internal(tex, coord, data)
# define imageAtomicXor(tex, coord, data) _texture_image_atomic_xor_internal(tex, coord, data)
# define imageAtomicOr(tex, coord, data) _texture_image_atomic_or_internal(tex, coord, data)
/* Atomic OR. */
template<typename S, access A>
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
return tex.texture->atomic_fetch_or(uint(coord), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_or(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_or(uint2(coord.xy), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_or(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_or(uint3(coord), vec<S, 4>(data)).x;
}
/* Atomic XOR. */
template<typename S, access A>
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
return tex.texture->atomic_fetch_xor(uint(coord), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_xor(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_xor(uint2(coord.xy), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_xor(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_xor(uint3(coord), vec<S, 4>(data)).x;
}
/* Atomic Min. */
template<typename S, access A>
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
return tex.texture->atomic_fetch_min(uint(coord), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint2(coord.xy), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_min(uint3(coord), vec<S, 4>(data)).x;
}
/* Atomic Add. */
template<typename S, access A>
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
return tex.texture->atomic_fetch_add(uint(coord), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_add(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_fetch_add(uint2(coord.xy), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_add(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_fetch_add(uint3(coord), vec<S, 4>(data)).x;
}
/* Atomic Exchange. */
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
int coord,
S data)
{
return tex.texture->atomic_exchange(uint(coord), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
int2 coord,
S data)
{
return tex.texture->atomic_exchange(uint2(coord.xy), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
int3 coord,
S data)
{
return tex.texture->atomic_exchange(uint3(coord), vec<S, 4>(data)).x;
}
#else
/**
* Texture atomic fallback function entry points.
* NOTE: When texture atomics are unsupported, the wrapped type contains a buffer-backed 2D
* texture. Atomic operations happen directly on the underlying buffer, and texture coordinates are
* remapped into 2D texture space from 2D Array or 3D texture coordinates.
*/
/* Image atomic operations. */
# define imageAtomicMin(tex, coord, data) \
_texture_image_atomic_min_internal_fallback(tex, coord, data)
# define imageAtomicAdd(tex, coord, data) \
_texture_image_atomic_add_internal_fallack(tex, coord, data)
# define imageAtomicExchange(tex, coord, data) \
_texture_image_atomic_exchange_internal_fallack(tex, coord, data)
# define imageAtomicXor(tex, coord, data) \
_texture_image_atomic_xor_internal_fallack(tex, coord, data)
# define imageAtomicOr(tex, coord, data) \
_texture_image_atomic_or_internal_fallack(tex, coord, data)
/** Pixel address location remapping. */
/* Map 2D/3D texture coordinate into a linear pixel ID. */
template<typename S, access A>
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
uint2 coord)
{
return (coord.x + coord.y * uint(tex.texture->get_width()));
}
template<typename S, access A>
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
int2 coord)
{
return tex_coord_to_linear_px(tex, uint2(coord));
}
template<typename S, access A>
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
uint3 coord)
{
return (coord.x + coord.y * tex.texture_size.x +
coord.z * (tex.texture_size.x * tex.texture_size.y));
}
template<typename S, access A>
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
int3 coord)
{
return tex_coord_to_linear_px(tex, uint3(coord));
}
template<typename S, access A>
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
uint3 coord)
{
return (coord.x + coord.y * tex.texture_size.x +
coord.z * (tex.texture_size.x * tex.texture_size.y));
}
template<typename S, access A>
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
int3 coord)
{
return tex_coord_to_linear_px(tex, uint3(coord));
}
/* Map 3D texture coordinate into 2D texture space. */
template<typename S, access A>
uint2 tex_coord_3d_to_2d(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
uint3 coord)
{
uint linear_id = tex_coord_to_linear_px(tex, coord);
uint tex_full_w = uint(tex.texture->get_width());
uint2 out_2dcoord;
out_2dcoord.y = linear_id / tex_full_w;
out_2dcoord.x = linear_id - (out_2dcoord.y * tex_full_w);
return out_2dcoord;
}
template<typename S, access A>
uint2 tex_coord_3d_to_2d(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
uint3 coord)
{
uint linear_id = tex_coord_to_linear_px(tex, coord);
uint tex_full_w = uint(tex.texture->get_width());
uint2 out_2dcoord;
out_2dcoord.y = linear_id / tex_full_w;
out_2dcoord.x = linear_id - (out_2dcoord.y * tex_full_w);
return out_2dcoord;
}
template<int N> bool in_range(vec<int, N> value, vec<int, N> min, vec<int, N> max)
{
return (all(value >= min) && all(value < max));
}
/* Map 2D/3D texture coordinate into buffer index, accounting for padded row widths. */
template<typename S, access A>
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
uint2 coord)
{
return (coord.x + coord.y * uint(tex.aligned_width));
}
template<typename S, access A>
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
int2 coord)
{
return tex_coord_to_linear_buffer_id(tex, uint2(coord));
}
template<typename S, access A>
uint tex_coord_to_linear_buffer_id(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, uint3 coord)
{
uint2 coord2d = tex_coord_3d_to_2d(tex, coord);
return (coord2d.x + coord2d.y * uint(tex.aligned_width));
}
template<typename S, access A>
uint tex_coord_to_linear_buffer_id(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord)
{
return tex_coord_to_linear_buffer_id(tex, uint3(coord));
}
template<typename S, access A>
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
uint3 coord)
{
uint2 coord2d = tex_coord_3d_to_2d(tex, coord);
return (coord2d.x + coord2d.y * uint(tex.aligned_width));
}
template<typename S, access A>
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
int3 coord)
{
return tex_coord_to_linear_buffer_id(tex, uint3(coord));
}
/* imageAtomicMin. */
template<typename S, access A>
S _texture_image_atomic_min_internal_fallback(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
{
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
{
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicMin(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_min_internal_fallback(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicMin(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_min_internal_fallback(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicMin(tex.buffer[linear_id], data);
}
/* imageAtomicAdd. */
template<typename S, access A>
S _texture_image_atomic_add_internal_fallack(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
{
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
{
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicAdd(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_add_internal_fallack(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicAdd(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_add_internal_fallack(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicAdd(tex.buffer[linear_id], data);
}
/* imageAtomicExchange. */
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallack(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
{
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
{
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicExchange(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallack(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicExchange(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_exchange_internal_fallack(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicExchange(tex.buffer[linear_id], data);
}
/* imageAtomicXor. */
template<typename S, access A>
S _texture_image_atomic_xor_internal_fallack(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
{
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
{
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicXor(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_xor_internal_fallack(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicXor(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_xor_internal_fallack(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicXor(tex.buffer[linear_id], data);
}
/* imageAtomicOr. */
template<typename S, access A>
S _texture_image_atomic_or_internal_fallack(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
{
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
{
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicOr(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_or_internal_fallack(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicOr(tex.buffer[linear_id], data);
}
template<typename S, access A>
S _texture_image_atomic_or_internal_fallack(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
{
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
return S(0);
}
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
return atomicOr(tex.buffer[linear_id], data);
}
/** Texture sampling, reading and writing functions with layer mapping. */
/* Texel Fetch. */
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, access::sample> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
return tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, access::sample> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->sample(
_point_sample_, float2(tex_coord_3d_to_2d(tex, uint3(texel + offset))), level(lod));
}
template<typename S, typename T>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, access::sample> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->sample(
_point_sample_, float2(tex_coord_3d_to_2d(tex, uint3(texel + offset))), level(lod));
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
vec<T, 2> texel,
uint lod,
vec<T, 2> offset = vec<T, 2>(0))
{
float w = tex.texture->get_width() >> lod;
float h = tex.texture->get_height() >> lod;
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
(texel.y + offset.y) < h)
{
return tex.texture->read(uint2(texel + offset), lod);
}
else {
return vec<S, 4>(0);
}
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->read(tex_coord_3d_to_2d(tex, uint3(texel + offset)), lod);
}
template<typename S, typename T, access A>
inline vec<S, 4> _texelFetch_internal(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
vec<T, 3> texel,
uint lod,
vec<T, 3> offset = vec<T, 3>(0))
{
return tex.texture->read(tex_coord_3d_to_2d(tex, uint3(texel + offset)), lod);
}
/* imageStore. */
template<typename S, typename T, access A>
inline void _texture_write_internal(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
{
float w = tex.texture_size.x;
float h = tex.texture_size.y;
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) {
tex.texture->write(value, uint2(_coord.xy));
}
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
{
tex.texture->write(value, uint2(_coord.xy));
}
template<typename S, typename T, access A>
inline void _texture_write_internal(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
T _coord,
vec<S, 4> value)
{
float w = tex.texture_size.x;
float h = tex.texture_size.y;
float d = tex.texture_size.z;
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
_coord.z < d)
{
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
}
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
T _coord,
vec<S, 4> value)
{
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
}
template<typename S, typename T, access A>
inline void _texture_write_internal(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
{
float w = tex.texture_size.x;
float h = tex.texture_size.y;
float d = tex.texture_size.z;
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
_coord.z < d)
{
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
}
}
template<typename S, typename T, access A>
inline void _texture_write_internal_fast(
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
{
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
}
#endif
/* Matrix compare operators. */
/** TODO(fclem): Template. */
inline bool operator==(float4x4 a, float4x4 b)
{
for (int i = 0; i < 4; i++) {
if (any(a[i] != b[i])) {
return false;
}
#define EQ_OP(type, ...) \
inline bool operator==(type a, type b) \
{ \
return __VA_ARGS__; \
}
return true;
}
inline bool operator==(float3x3 a, float3x3 b)
{
for (int i = 0; i < 3; i++) {
if (any(a[i] != b[i])) {
return false;
}
}
return true;
}
inline bool operator==(float2x2 a, float2x2 b)
{
for (int i = 0; i < 2; i++) {
if (any(a[i] != b[i])) {
return false;
}
}
return true;
}
EQ_OP(float2x2, all(a[0] == b[0]) && all(a[1] == b[1]))
EQ_OP(float2x3, all(a[0] == b[0]) && all(a[1] == b[1]))
EQ_OP(float2x4, all(a[0] == b[0]) && all(a[1] == b[1]))
EQ_OP(float3x2, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]))
EQ_OP(float3x3, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]))
EQ_OP(float3x4, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]))
EQ_OP(float4x2, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3]))
EQ_OP(float4x3, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3]))
EQ_OP(float4x4, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3]))
#undef EQ_OP
inline bool operator!=(float4x4 a, float4x4 b)
{
return !(a == b);
}
inline bool operator!=(float3x3 a, float3x3 b)
{
return !(a == b);
}
inline bool operator!=(float2x2 a, float2x2 b)
template<int N, int M> inline bool operator!=(matrix<float, N, M> a, matrix<float, N, M> b)
{
return !(a == b);
}
/* Matrix unary minus operator. */
inline float4x4 operator-(float4x4 a)
template<int N, int M> inline matrix<float, N, M> operator-(matrix<float, N, M> a)
{
float4x4 b;
for (int i = 0; i < 4; i++) {
b[i] = -a[i];
}
return b;
}
inline float3x3 operator-(float3x3 a)
{
float3x3 b;
for (int i = 0; i < 3; i++) {
b[i] = -a[i];
}
return b;
}
inline float2x2 operator-(float2x2 a)
{
float2x2 b;
for (int i = 0; i < 2; i++) {
b[i] = -a[i];
}
return b;
return a * -1.0;
}
/* SSBO Vertex Fetch Mode. */
@@ -1954,115 +901,29 @@ inline float2x2 operator-(float2x2 a)
/* Common Functions. */
#define dFdx(x) dfdx(x)
#define dFdy(x) dfdy(x)
#define mod(x, y) _mtlmod(x, y)
#define discard discard_fragment()
#define inversesqrt rsqrt
inline float radians(float deg)
{
/* Constant factor: M_PI_F/180.0. */
return deg * 0.01745329251f;
}
inline float degrees(float rad)
{
/* Constant factor: 180.0/M_PI_F. */
return rad * 57.2957795131;
}
#define select(A, B, C) mix(A, B, C)
/* Type conversions and type truncation. */
inline float4 to_float4(float3 val)
{
return float4(val, 1.0);
}
/* Type conversions and type truncation (Utility Functions). */
inline float3x3 mat4_to_mat3(float4x4 matrix)
{
return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz);
}
inline int floatBitsToInt(float f)
{
return as_type<int>(f);
}
inline int2 floatBitsToInt(float2 f)
{
return as_type<int2>(f);
}
inline int3 floatBitsToInt(float3 f)
{
return as_type<int3>(f);
}
inline int4 floatBitsToInt(float4 f)
{
return as_type<int4>(f);
}
inline uint floatBitsToUint(float f)
{
return as_type<uint>(f);
}
inline uint2 floatBitsToUint(float2 f)
{
return as_type<uint2>(f);
}
inline uint3 floatBitsToUint(float3 f)
{
return as_type<uint3>(f);
}
inline uint4 floatBitsToUint(float4 f)
{
return as_type<uint4>(f);
}
inline float intBitsToFloat(int f)
{
return as_type<float>(f);
}
inline float2 intBitsToFloat(int2 f)
{
return as_type<float2>(f);
}
inline float3 intBitsToFloat(int3 f)
{
return as_type<float3>(f);
}
inline float4 intBitsToFloat(int4 f)
{
return as_type<float4>(f);
}
inline float uintBitsToFloat(uint f)
{
return as_type<float>(f);
}
inline float2 uintBitsToFloat(uint2 f)
{
return as_type<float2>(f);
}
inline float3 uintBitsToFloat(uint3 f)
{
return as_type<float3>(f);
}
inline float4 uintBitsToFloat(uint4 f)
{
return as_type<float4>(f);
}
/* clang-format off */
inline float radians(float deg) { return deg * 0.01745329251; /* M_PI_F / 180 */ }
inline float degrees(float rad) { return rad * 57.2957795131; /* 180 / M_PI_F */ }
inline int floatBitsToInt(float f) { return as_type<int>(f); }
inline int2 floatBitsToInt(float2 f) { return as_type<int2>(f); }
inline int3 floatBitsToInt(float3 f) { return as_type<int3>(f); }
inline int4 floatBitsToInt(float4 f) { return as_type<int4>(f); }
inline uint floatBitsToUint(float f) { return as_type<uint>(f); }
inline uint2 floatBitsToUint(float2 f) { return as_type<uint2>(f); }
inline uint3 floatBitsToUint(float3 f) { return as_type<uint3>(f); }
inline uint4 floatBitsToUint(float4 f) { return as_type<uint4>(f); }
inline float intBitsToFloat(int f) { return as_type<float>(f); }
inline float2 intBitsToFloat(int2 f) { return as_type<float2>(f); }
inline float3 intBitsToFloat(int3 f) { return as_type<float3>(f); }
inline float4 intBitsToFloat(int4 f) { return as_type<float4>(f); }
inline float uintBitsToFloat(uint f) { return as_type<float>(f); }
inline float2 uintBitsToFloat(uint2 f) { return as_type<float2>(f); }
inline float3 uintBitsToFloat(uint3 f) { return as_type<float3>(f); }
inline float4 uintBitsToFloat(uint4 f) { return as_type<float4>(f); }
/* clang-format on */
#define bitfieldReverse reverse_bits
#define bitfieldExtract extract_bits
@@ -2093,89 +954,32 @@ template<typename T> int findMSB(T x)
return int(sizeof(T) * 8) - 1 - int(clz(x));
}
#define unpackUnorm4x8 unpack_unorm4x8_to_float
#define unpackSnorm4x8 unpack_snorm4x8_to_float
#define unpackUnorm2x16 unpack_unorm2x16_to_float
#define unpackSnorm2x16 unpack_snorm2x16_to_float
/* Texture size functions. Add texture types as needed. */
#define imageSize(image) textureSize(image, 0)
template<typename T, access A>
int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod)
{
return int(image.texture->get_width());
}
template<typename T, access A>
int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod)
{
return int2(image.texture->get_width(), image.texture->get_array_size());
}
template<typename T, access A>
int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod)
{
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
}
template<typename T, access A>
int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod)
{
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
}
template<typename T, access A>
int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod)
{
return int3(image.texture->get_width(lod),
image.texture->get_height(lod),
image.texture->get_array_size());
}
template<typename T, access A>
int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod)
{
return int3(image.texture->get_width(lod),
image.texture->get_height(lod),
image.texture->get_array_size());
}
template<typename T, access A>
int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod)
{
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
}
template<typename T, access A>
int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod)
{
return int3(image.texture->get_width(lod),
image.texture->get_height(lod),
image.texture->get_depth(lod));
}
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
/* textureSize functions for fallback atomic textures. */
template<typename T, access A>
int2 textureSize(thread _mtl_combined_image_sampler_2d_atomic_fallback<T, A> image, uint lod)
int2 textureSize(thread _mtl_sampler_2d_atomic<T, A> image, uint lod)
{
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
}
template<typename T, access A>
int3 textureSize(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<T, A> image, uint lod)
int3 textureSize(thread _mtl_sampler_2d_array_atomic<T, A> image, uint lod)
{
return int3(image.texture_size);
}
template<typename T, access A>
int3 textureSize(thread _mtl_combined_image_sampler_3d_atomic_fallback<T, A> image, uint lod)
int3 textureSize(thread _mtl_sampler_3d_atomic<T, A> image, uint lod)
{
return int3(image.texture_size);
}
#endif
#define unpackUnorm4x8 unpack_unorm4x8_to_float
#define unpackSnorm4x8 unpack_snorm4x8_to_float
#define unpackUnorm2x16 unpack_unorm2x16_to_float
#define unpackSnorm2x16 unpack_snorm2x16_to_float
/* Equality and comparison functions. */
#define lessThan(a, b) ((a) < (b))
#define lessThanEqual(a, b) ((a) <= (b))
@@ -2184,49 +988,18 @@ int3 textureSize(thread _mtl_combined_image_sampler_3d_atomic_fallback<T, A> ima
#define equal(a, b) ((a) == (b))
#define notEqual(a, b) ((a) != (b))
template<typename T, int n> bool all(vec<T, n> x)
{
bool _all = true;
for (int i = 0; i < n; i++) {
_all = _all && (x[i] > 0);
}
return _all;
}
template<typename T, int n> bool any(vec<T, n> x)
{
bool _any = false;
for (int i = 0; i < n; i++) {
_any = _any || (x[i] > 0);
}
return _any;
}
/* Modulo functionality. */
int _mtlmod(int a, int b)
{
return a - b * (a / b);
}
float _mtlmod(float a, float b)
{
return a - b * floor(a / b);
}
template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y)
{
return x - (y * floor(x / y));
}
template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y)
{
return x - (vec<T, n>(y) * floor(x / vec<T, n>(y)));
}
template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y)
{
return vec<U, n>(x) - (y * floor(vec<U, n>(x) / y));
}
/* `mod(x, y)` is defined as `x - (y * floor(x / y))` in the metal specification.
* This is not compatible with GLSL implementation. So we override it with a compatible one. */
#define mod(x, y) _compatible_mod(x, y)
#define MOD \
{ \
return x - y * floor(x / y); \
}
float _compatible_mod(float x, float y) MOD;
template<int S> vec<float, S> _compatible_mod(vec<float, S> x, float y) MOD;
template<int S> vec<float, S> _compatible_mod(vec<float, S> x, vec<float, S> y) MOD;
#undef MOD
/* Mathematical functions. */
template<typename T> T atan(T y, T x)
@@ -2249,10 +1022,7 @@ float4x4 inverse(float4x4 a)
float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1];
float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1];
float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2];
float inv_det = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
float4x4 adjoint{};
float4x4 adjoint;
adjoint[0][0] = a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09;
adjoint[0][1] = a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09;
adjoint[0][2] = a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03;
@@ -2269,40 +1039,38 @@ float4x4 inverse(float4x4 a)
adjoint[3][1] = a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06;
adjoint[3][2] = a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00;
adjoint[3][3] = a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00;
return adjoint * inv_det;
float determinant = b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06;
/* Multiplying by inverse since matrix types don't have divide operators. */
return adjoint * (1.0 / determinant);
}
float3x3 inverse(float3x3 m)
{
float b00 = m[1][1] * m[2][2] - m[2][1] * m[1][2];
float b01 = m[0][1] * m[2][2] - m[2][1] * m[0][2];
float b02 = m[0][1] * m[1][2] - m[1][1] * m[0][2];
float inv_det = 1.0 / (m[0][0] * b00 - m[1][0] * b01 + m[2][0] * b02);
float3x3 adjoint{};
adjoint[0][0] = +b00;
adjoint[0][1] = -b01;
adjoint[0][2] = +b02;
float3x3 adjoint;
adjoint[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]);
adjoint[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]);
adjoint[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]);
adjoint[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]);
adjoint[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]);
adjoint[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]);
adjoint[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]);
adjoint[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]);
adjoint[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]);
return adjoint * inv_det;
float determinant = m[0][0] * adjoint[0][0] + m[1][0] * adjoint[0][1] + m[2][0] * adjoint[0][2];
/* Multiplying by inverse since matrix types don't have divide operators. */
return adjoint * (1.0 / determinant);
}
float2x2 inverse(float2x2 m)
{
float inv_det = 1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]);
float2x2 adjoint{};
float2x2 adjoint;
adjoint[0][0] = +m[1][1];
adjoint[1][0] = -m[1][0];
adjoint[0][1] = -m[0][1];
adjoint[1][1] = +m[0][0];
return adjoint * inv_det;
float determinant = m[0][0] * m[1][1] - m[1][0] * m[0][1];
/* Multiplying by inverse since matrix types don't have divide operators. */
return adjoint * (1.0 / determinant);
}
/* Additional overloads for builtin functions. */
@@ -2311,226 +1079,70 @@ float distance(float x, float y)
return abs(y - x);
}
/* Overload for mix(A, B, float ratio). */
template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val)
/* Overload for mix(vec<T>, vec<T>, float). */
template<typename T, int S> vec<T, S> mix(vec<T, S> a, vec<T, S> b, float fac)
{
return mix(a, b, vec<T, Size>(val));
}
/* Overload for mix(A, B, bvec<N>). */
template<typename T, int Size>
vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask)
{
vec<T, Size> result;
for (int i = 0; i < Size; i++) {
result[i] = mask[i] ? b[i] : a[i];
}
return result;
return a * (1.0 - fac) + b * fac;
}
/* Using vec<bool, S> does not appear to work, splitting cases. */
/* Overload for mix(A, B, bvec<N>). */
template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask)
{
vec<T, 4> result;
for (int i = 0; i < 4; i++) {
result[i] = mask[i] ? b[i] : a[i];
#define SELECT \
{ \
return select(a, b, mask); \
}
return result;
}
/* Overload for mix(A, B, bvec<N>). */
template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask)
{
vec<T, 3> result;
for (int i = 0; i < 3; i++) {
result[i] = mask[i] ? b[i] : a[i];
}
return result;
}
template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bool4 mask) SELECT;
template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bool3 mask) SELECT;
template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bool2 mask) SELECT;
/* Overload for mix(A, B, bvec<N>). */
template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask)
{
vec<T, 2> result;
for (int i = 0; i < 2; i++) {
result[i] = mask[i] ? b[i] : a[i];
}
return result;
}
/* Overload for mix(A, B, bvec<N>). */
template<typename T> T mix(T a, T b, bool mask)
{
return (mask) ? b : a;
}
#undef SELECT
template<typename T, int Size> bool is_zero(vec<T, Size> a)
{
for (int i = 0; i < Size; i++) {
if (a[i] != T(0)) {
return false;
}
return all(a == vec<T, Size>(0));
}
#define in
#define out thread
#define inout thread
#define _in_sta
#define _in_end
#define _out_sta (&
#define _out_end )
#define _inout_sta (&
#define _inout_end )
#define shared threadgroup
#define _shared_sta (&
#define _shared_end )
/* Defines generated by glsl_preprocess that contains threadgroup variables related codegen.
* See glsl_preprocess for more detail. */
#define MSL_SHARED_VARS_ARGS
#define MSL_SHARED_VARS_ASSIGN
#define MSL_SHARED_VARS_DECLARE
#define MSL_SHARED_VARS_PASS
/* Matrix reshaping functions. */
#define RESHAPE(mat_to, mat_from, ...) \
mat_to to_##mat_to(mat_from m) \
{ \
return mat_to(__VA_ARGS__); \
}
return true;
}
/**
* Matrix conversion fallback for functional style casting & constructors.
* To avoid name collision with the types, they are replaced with uppercase version
* before compilation.
*/
/* clang-format off */
RESHAPE(float2x2, float3x3, m[0].xy, m[1].xy)
RESHAPE(float2x2, float4x4, m[0].xy, m[1].xy)
RESHAPE(float3x3, float4x4, m[0].xyz, m[1].xyz, m[2].xyz)
RESHAPE(float3x3, float2x2, m[0].x, m[0].y, 0, m[1].x, m[1].y, 0, 0, 0, 1)
RESHAPE(float4x4, float2x2, m[0].x, m[0].y, 0, 0, m[1].x, m[1].y, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1)
RESHAPE(float4x4, float3x3, m[0].x, m[0].y, m[0].z, 0, m[1].x, m[1].y, m[1].z, 0, m[2].x, m[2].y, m[2].z, 0, 0, 0, 0, 1)
/* clang-format on */
/* TODO(fclem): Remove. Use Transform instead. */
RESHAPE(float3x3, float3x4, m[0].xyz, m[1].xyz, m[2].xyz)
#undef RESHAPE
mat2 MAT2x2(vec2 a, vec2 b)
{
return mat2(a, b);
}
mat2 MAT2x2(float a1, float a2, float b1, float b2)
{
return mat2(vec2(a1, a2), vec2(b1, b2));
}
mat2 MAT2x2(float f)
{
return mat2(f);
}
mat2 MAT2x2(mat3 m)
{
return mat2(m[0].xy, m[1].xy);
}
mat2 MAT2x2(mat4 m)
{
return mat2(m[0].xy, m[1].xy);
}
mat3 MAT3x3(vec3 a, vec3 b, vec3 c)
{
return mat3(a, b, c);
}
mat3 MAT3x3(
float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
{
return mat3(vec3(a1, a2, a3), vec3(b1, b2, b3), vec3(c1, c2, c3));
}
mat3 MAT3x3(vec3 a, float b1, float b2, float b3, float c1, float c2, float c3)
{
return mat3(a, vec3(b1, b2, b3), vec3(c1, c2, c3));
}
mat3 MAT3x3(float f)
{
return mat3(f);
}
mat3 MAT3x3(mat4 m)
{
return mat3(m[0].xyz, m[1].xyz, m[2].xyz);
}
mat3 MAT3x3(mat3x4 m)
{
return mat3(m[0].xyz, m[1].xyz, m[2].xyz);
}
mat3 MAT3x3(mat2 m)
{
return mat3(vec3(m[0].xy, 0.0), vec3(m[1].xy, 0.0), vec3(0.0, 0.0, 1.0));
}
mat4 MAT4x4(vec4 a, vec4 b, vec4 c, vec4 d)
{
return mat4(a, b, c, d);
}
mat4 MAT4x4(float a1,
float a2,
float a3,
float a4,
float b1,
float b2,
float b3,
float b4,
float c1,
float c2,
float c3,
float c4,
float d1,
float d2,
float d3,
float d4)
{
return mat4(
vec4(a1, a2, a3, a4), vec4(b1, b2, b3, b4), vec4(c1, c2, c3, c4), vec4(d1, d2, d3, d4));
}
mat4 MAT4x4(float f)
{
return mat4(f);
}
mat4 MAT4x4(mat3 m)
{
return mat4(
vec4(m[0].xyz, 0.0), vec4(m[1].xyz, 0.0), vec4(m[2].xyz, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
}
mat4 MAT4x4(mat3x4 m)
{
return mat4(m[0], m[1], m[2], vec4(0.0, 0.0, 0.0, 1.0));
}
mat4 MAT4x4(mat4x3 m)
{
return mat4(m[0][0],
m[0][1],
m[0][2],
0.0,
m[1][0],
m[1][1],
m[1][2],
0.0,
m[2][0],
m[2][1],
m[2][2],
0.0,
m[3][0],
m[3][1],
m[3][2],
0.0);
}
mat4 MAT4x4(mat2 m)
{
return mat4(vec4(m[0].xy, 0.0, 0.0),
vec4(m[1].xy, 0.0, 0.0),
vec4(0.0, 0.0, 1.0, 0.0),
vec4(0.0, 0.0, 0.0, 1.0));
}
mat3x4 MAT3x4(vec4 a, vec4 b, vec4 c)
{
return mat3x4(a, b, c);
}
mat3x4 MAT3x4(float a1,
float a2,
float a3,
float a4,
float b1,
float b2,
float b3,
float b4,
float c1,
float c2,
float c3,
float c4)
{
return mat3x4(vec4(a1, a2, a3, a4), vec4(b1, b2, b3, b4), vec4(c1, c2, c3, c4));
}
mat3x4 MAT3x4(float f)
{
return mat3x4(f);
}
mat3x4 MAT3x4(mat3 m)
{
return mat3x4(vec4(m[0].xyz, 0.0), vec4(m[1].xyz, 0.0), vec4(m[2].xyz, 0.0));
}
mat3x4 MAT3x4(mat2 m)
{
return mat3x4(vec4(m[0].xy, 0.0, 0.0), vec4(m[1].xy, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0));
}
#define MAT2 MAT2x2
#define MAT3 MAT3x3
#define MAT4 MAT4x4
#undef ENABLE_IF
/* Array syntax compatibility. */
/* clang-format off */
@@ -2550,6 +1162,8 @@ mat3x4 MAT3x4(mat2 m)
#define bool2_array(...) { __VA_ARGS__ }
#define bool3_array(...) { __VA_ARGS__ }
#define bool4_array(...) { __VA_ARGS__ }
#define ARRAY_T(type)
#define ARRAY_V(...) {__VA_ARGS__}
/* clang-format on */
#define SHADER_LIBRARY_CREATE_INFO(a)

View File

@@ -6,6 +6,26 @@
/** IMPORTANT: Be wary of size and alignment matching for types that are present
* in C++ shared code. */
/* Matrix reshaping functions. Needs to be declared before matrix type aliases. */
#define RESHAPE(name, mat_to, mat_from) \
mat_to to_##name(mat_from m) \
{ \
return mat_to(m); \
}
/* clang-format off */
RESHAPE(float2x2, mat2x2, mat3x3)
RESHAPE(float2x2, mat2x2, mat4x4)
RESHAPE(float3x3, mat3x3, mat4x4)
RESHAPE(float3x3, mat3x3, mat2x2)
RESHAPE(float4x4, mat4x4, mat2x2)
RESHAPE(float4x4, mat4x4, mat3x3)
/* clang-format on */
/* TODO(fclem): Remove. Use Transform instead. */
RESHAPE(float3x3, mat3x3, mat3x4)
#undef RESHAPE
/* Boolean in GLSL are 32bit in interface structs. */
#define bool32_t bool
#define bool2 bvec2
@@ -121,8 +141,19 @@ bool is_zero(vec4 A)
#define bool2_array bool2[]
#define bool3_array bool3[]
#define bool4_array bool4[]
#define ARRAY_T(type) type[]
#define ARRAY_V
#define SHADER_LIBRARY_CREATE_INFO(a)
#define VERTEX_SHADER_CREATE_INFO(a)
#define FRAGMENT_SHADER_CREATE_INFO(a)
#define COMPUTE_SHADER_CREATE_INFO(a)
#define _in_sta
#define _in_end
#define _out_sta
#define _out_end
#define _inout_sta
#define _inout_end
#define _shared_sta
#define _shared_end

View File

@@ -70,22 +70,22 @@ void main()
EulerXYZ euler = EulerXYZ(1, 2, 3);
Quaternion quat = to_quaternion(euler);
AxisAngle axis_angle = to_axis_angle(euler);
m = mat4(from_rotation(euler));
m = to_float4x4(from_rotation(euler));
EXPECT_NEAR(m, expect, 1e-5);
m = mat4(from_rotation(quat));
m = to_float4x4(from_rotation(quat));
EXPECT_NEAR(m, expect, 1e-5);
m = mat4(from_rotation(axis_angle));
m = to_float4x4(from_rotation(axis_angle));
EXPECT_NEAR(m, expect, 3e-4); /* Has some precision issue on some platform. */
m = from_scale(vec4(1, 2, 3, 4));
expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 3, 0), vec4(0, 0, 0, 4));
EXPECT_TRUE(is_equal(m, expect, 0.00001));
m = mat4(from_scale(vec3(1, 2, 3)));
m = to_float4x4(from_scale(vec3(1, 2, 3)));
expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 3, 0), vec4(0, 0, 0, 1));
EXPECT_TRUE(is_equal(m, expect, 0.00001));
m = mat4(from_scale(vec2(1, 2)));
m = to_float4x4(from_scale(vec2(1, 2)));
expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 1, 0), vec4(0, 0, 0, 1));
EXPECT_TRUE(is_equal(m, expect, 0.00001));
@@ -182,8 +182,8 @@ void main()
EulerXYZ eul;
Quaternion qt;
vec3 scale;
to_rot_scale(mat3x3(m), eul, scale);
to_rot_scale(mat3x3(m), qt, scale);
to_rot_scale(to_float3x3(m), eul, scale);
to_rot_scale(to_float3x3(m), qt, scale);
EXPECT_NEAR(scale, expect_scale, 0.00001);
EXPECT_NEAR(as_vec4(qt), as_vec4(expect_qt), 0.0002);
EXPECT_NEAR(as_vec3(eul), as_vec3(expect_eul), 0.0002);

View File

@@ -236,9 +236,6 @@ void VKDevice::init_glsl_patch()
ss << "#define gpu_BaryCoordNoPersp gl_BaryCoordNoPerspEXT\n";
}
ss << "#define DFDX_SIGN 1.0\n";
ss << "#define DFDY_SIGN 1.0\n";
/* GLSL Backend Lib. */
ss << datatoc_glsl_shader_defines_glsl;
glsl_patch_ = ss.str();

View File

@@ -1009,7 +1009,7 @@ static PyObject *pygpu_shader_create_from_info(BPyGPUShader * /*self*/, BPyGPUSh
return nullptr;
}
GPUShader *shader = GPU_shader_create_from_info(o->info);
GPUShader *shader = GPU_shader_create_from_info_python(o->info);
if (!shader) {
PyErr_SetString(PyExc_Exception, "Shader Compile Error, see console for more details");
return nullptr;