Metal: Simplify MSL translation
Move most of the string preprocessing used for MSL compatibility to `glsl_preprocess`. Enforce some changes like matrix constructor and array constructor to the GLSL codebase. This is for C++ compatibility. Additionally reduce the amount of code duplication inside the compatibility code. Pull Request: https://projects.blender.org/blender/blender/pulls/128634
This commit is contained in:
committed by
Clément Foucault
parent
9e604f0e00
commit
9c0321ae9b
@@ -35,7 +35,7 @@ void main()
|
||||
* boundary condition, so we initialize all inputs by the boundary pixel. */
|
||||
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
|
||||
vec4 input_boundary = texture_load(input_tx, boundary_texel);
|
||||
vec4 inputs[FILTER_ORDER + 1] = vec4[](
|
||||
vec4 inputs[FILTER_ORDER + 1] = float4_array(
|
||||
input_boundary, input_boundary, input_boundary, input_boundary, input_boundary);
|
||||
|
||||
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
|
||||
@@ -44,7 +44,7 @@ void main()
|
||||
* boundary coefficient. See the DericheGaussianCoefficients class for more information on the
|
||||
* boundary handing. */
|
||||
vec4 output_boundary = input_boundary * boundary_coefficient;
|
||||
vec4 outputs[FILTER_ORDER + 1] = vec4[](
|
||||
vec4 outputs[FILTER_ORDER + 1] = float4_array(
|
||||
output_boundary, output_boundary, output_boundary, output_boundary, output_boundary);
|
||||
|
||||
for (int x = 0; x < width; x++) {
|
||||
|
||||
@@ -16,8 +16,9 @@ void main()
|
||||
int radius = max(0, size);
|
||||
#endif
|
||||
|
||||
vec4 mean_of_squared_color_of_quadrants[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
|
||||
vec4 mean_of_color_of_quadrants[4] = vec4[](vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
|
||||
vec4 mean_of_squared_color_of_quadrants[4] = float4_array(
|
||||
vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
|
||||
vec4 mean_of_color_of_quadrants[4] = float4_array(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
|
||||
|
||||
/* Compute the above statistics for each of the quadrants around the current pixel. */
|
||||
for (int q = 0; q < 4; q++) {
|
||||
|
||||
@@ -11,7 +11,7 @@ void main()
|
||||
|
||||
vec2 coordinates = (vec2(texel) + vec2(0.5)) / output_size;
|
||||
|
||||
vec3 transformed_coordinates = mat3(homography_matrix) * vec3(coordinates, 1.0);
|
||||
vec3 transformed_coordinates = to_float3x3(homography_matrix) * vec3(coordinates, 1.0);
|
||||
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;
|
||||
|
||||
/* The derivatives of the projected coordinates with respect to x and y are the first and
|
||||
|
||||
@@ -8,7 +8,7 @@ void main()
|
||||
|
||||
vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(imageSize(mask_img));
|
||||
|
||||
vec3 transformed_coordinates = mat3(homography_matrix) * vec3(coordinates, 1.0);
|
||||
vec3 transformed_coordinates = to_float3x3(homography_matrix) * vec3(coordinates, 1.0);
|
||||
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;
|
||||
|
||||
bool is_inside_plane = all(greaterThanEqual(projected_coordinates, vec2(0.0))) &&
|
||||
|
||||
@@ -13,7 +13,7 @@ void main()
|
||||
|
||||
vec4 accumulated_color = vec4(0.0);
|
||||
for (int i = 0; i < number_of_motion_blur_samples; i++) {
|
||||
mat3 homography_matrix = mat3(homography_matrices[i]);
|
||||
mat3 homography_matrix = to_float3x3(homography_matrices[i]);
|
||||
|
||||
vec3 transformed_coordinates = homography_matrix * vec3(coordinates, 1.0);
|
||||
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;
|
||||
|
||||
@@ -10,7 +10,7 @@ void main()
|
||||
|
||||
float accumulated_mask = 0.0;
|
||||
for (int i = 0; i < number_of_motion_blur_samples; i++) {
|
||||
mat3 homography_matrix = mat3(homography_matrices[i]);
|
||||
mat3 homography_matrix = to_float3x3(homography_matrices[i]);
|
||||
|
||||
vec3 transformed_coordinates = homography_matrix * vec3(coordinates, 1.0);
|
||||
vec2 projected_coordinates = transformed_coordinates.xy / transformed_coordinates.z;
|
||||
|
||||
@@ -15,7 +15,7 @@ void main()
|
||||
/* Transform the input image by transforming the domain coordinates with the inverse of input
|
||||
* image's transformation. The inverse transformation is an affine matrix and thus the
|
||||
* coordinates should be in homogeneous coordinates. */
|
||||
coordinates = (mat3(inverse_transformation) * vec3(coordinates, 1.0)).xy;
|
||||
coordinates = (to_float3x3(inverse_transformation) * vec3(coordinates, 1.0)).xy;
|
||||
|
||||
/* Subtract the offset and divide by the input image size to get the relevant coordinates into
|
||||
* the sampler's expected [0, 1] range. */
|
||||
|
||||
@@ -56,7 +56,7 @@ void main()
|
||||
* boundary condition, so we initialize all inputs by the boundary pixel. */
|
||||
ivec2 boundary_texel = is_causal ? ivec2(0, y) : ivec2(width - 1, y);
|
||||
vec4 input_boundary = texture_load(input_tx, boundary_texel);
|
||||
vec4 inputs[FILTER_ORDER + 1] = vec4[](input_boundary, input_boundary, input_boundary);
|
||||
vec4 inputs[FILTER_ORDER + 1] = float4_array(input_boundary, input_boundary, input_boundary);
|
||||
|
||||
/* Create an array that holds the last FILTER_ORDER outputs along with the current output. The
|
||||
* current output is at index 0 and the oldest output is at index FILTER_ORDER. We assume Neumann
|
||||
@@ -64,7 +64,7 @@ void main()
|
||||
* boundary coefficient. See the VanVlietGaussianCoefficients class for more information on the
|
||||
* boundary handing. */
|
||||
vec4 output_boundary = input_boundary * boundary_coefficient;
|
||||
vec4 outputs[FILTER_ORDER + 1] = vec4[](output_boundary, output_boundary, output_boundary);
|
||||
vec4 outputs[FILTER_ORDER + 1] = float4_array(output_boundary, output_boundary, output_boundary);
|
||||
|
||||
for (int x = 0; x < width; x++) {
|
||||
/* Run forward across rows for the causal filter and backward for the non causal filter. */
|
||||
|
||||
@@ -57,8 +57,8 @@ const float dof_layer_offset = 0.5 + 0.5;
|
||||
|
||||
const int dof_max_slight_focus_radius = DOF_MAX_SLIGHT_FOCUS_RADIUS;
|
||||
|
||||
const uvec2 quad_offsets_u[4] = uvec2[4](uvec2(0, 1), uvec2(1, 1), uvec2(1, 0), uvec2(0, 0));
|
||||
const vec2 quad_offsets[4] = vec2[4](
|
||||
const uvec2 quad_offsets_u[4] = uint2_array(uvec2(0, 1), uvec2(1, 1), uvec2(1, 0), uvec2(0, 0));
|
||||
const vec2 quad_offsets[4] = float2_array(
|
||||
vec2(-0.5, 0.5), vec2(0.5, 0.5), vec2(0.5, -0.5), vec2(-0.5, -0.5));
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -80,7 +80,7 @@ vec3 dof_neighborhood_clamp(vec2 frag_coord, vec3 color, float center_coc, float
|
||||
{
|
||||
/* Stabilize color by clamping with the stable half res neighborhood. */
|
||||
vec3 neighbor_min, neighbor_max;
|
||||
const vec2 corners[4] = vec2[4](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));
|
||||
const vec2 corners[4] = float2_array(vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
/**
|
||||
* Visit the 4 half-res texels around (and containing) the full-resolution texel.
|
||||
|
||||
@@ -125,7 +125,7 @@ float dof_bilateral_weight(float reference_coc, float sample_coc)
|
||||
DofSample dof_spatial_filtering()
|
||||
{
|
||||
/* Plus (+) shape offsets. */
|
||||
const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
|
||||
const ivec2 plus_offsets[4] = int2_array(ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
|
||||
DofSample center = dof_fetch_input_sample(ivec2(0));
|
||||
DofSample accum = DofSample(vec4(0.0), 0.0);
|
||||
float accum_weight = 0.0;
|
||||
@@ -165,7 +165,7 @@ struct DofNeighborhoodMinMax {
|
||||
DofNeighborhoodMinMax dof_neighbor_boundbox()
|
||||
{
|
||||
/* Plus (+) shape offsets. */
|
||||
const ivec2 plus_offsets[4] = ivec2[4](ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
|
||||
const ivec2 plus_offsets[4] = int2_array(ivec2(-1, 0), ivec2(0, -1), ivec2(1, 0), ivec2(0, 1));
|
||||
/**
|
||||
* Simple bounding box calculation in YCoCg as described in:
|
||||
* "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014
|
||||
@@ -183,7 +183,7 @@ DofNeighborhoodMinMax dof_neighbor_boundbox()
|
||||
* Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
|
||||
DofSample min_c_3x3 = min_c;
|
||||
DofSample max_c_3x3 = max_c;
|
||||
const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
|
||||
const ivec2 corners[4] = int2_array(ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
DofSample samp = dof_fetch_input_sample(corners[i]);
|
||||
min_c_3x3.color = min(min_c_3x3.color, samp.color);
|
||||
@@ -206,7 +206,7 @@ vec2 dof_pixel_history_motion_vector(ivec2 texel_sample)
|
||||
* Dilate velocity by using the nearest pixel in a cross pattern.
|
||||
* "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014 (Slide 27)
|
||||
*/
|
||||
const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
|
||||
const ivec2 corners[4] = int2_array(ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
|
||||
float min_depth = dof_fetch_half_depth(ivec2(0));
|
||||
ivec2 nearest_texel = ivec2(0);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
|
||||
@@ -9,13 +9,13 @@ void main()
|
||||
{
|
||||
/* Constant array moved inside function scope.
|
||||
* Minimizes local register allocation in MSL. */
|
||||
const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0),
|
||||
vec2(1.0, -1.0),
|
||||
vec2(-1.0, 1.0),
|
||||
const vec2 pos[6] = float2_array(vec2(-1.0, -1.0),
|
||||
vec2(1.0, -1.0),
|
||||
vec2(-1.0, 1.0),
|
||||
|
||||
vec2(1.0, -1.0),
|
||||
vec2(1.0, 1.0),
|
||||
vec2(-1.0, 1.0));
|
||||
vec2(1.0, -1.0),
|
||||
vec2(1.0, 1.0),
|
||||
vec2(-1.0, 1.0));
|
||||
|
||||
vec2 lP = pos[gl_VertexID % 6];
|
||||
int display_index = gl_VertexID / 6;
|
||||
|
||||
@@ -9,13 +9,13 @@ void main()
|
||||
{
|
||||
/* Constant array moved inside function scope.
|
||||
* Minimizes local register allocation in MSL. */
|
||||
const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0),
|
||||
vec2(1.0, -1.0),
|
||||
vec2(-1.0, 1.0),
|
||||
const vec2 pos[6] = float2_array(vec2(-1.0, -1.0),
|
||||
vec2(1.0, -1.0),
|
||||
vec2(-1.0, 1.0),
|
||||
|
||||
vec2(1.0, -1.0),
|
||||
vec2(1.0, 1.0),
|
||||
vec2(-1.0, 1.0));
|
||||
vec2(1.0, -1.0),
|
||||
vec2(1.0, 1.0),
|
||||
vec2(-1.0, 1.0));
|
||||
|
||||
lP = pos[gl_VertexID % 6];
|
||||
int display_index = gl_VertexID / 6;
|
||||
|
||||
@@ -9,13 +9,13 @@ void main()
|
||||
{
|
||||
/* Constant array moved inside function scope.
|
||||
* Minimizes local register allocation in MSL. */
|
||||
const vec2 pos[6] = vec2[6](vec2(-1.0, -1.0),
|
||||
vec2(1.0, -1.0),
|
||||
vec2(-1.0, 1.0),
|
||||
const vec2 pos[6] = float2_array(vec2(-1.0, -1.0),
|
||||
vec2(1.0, -1.0),
|
||||
vec2(-1.0, 1.0),
|
||||
|
||||
vec2(1.0, -1.0),
|
||||
vec2(1.0, 1.0),
|
||||
vec2(-1.0, 1.0));
|
||||
vec2(1.0, -1.0),
|
||||
vec2(1.0, 1.0),
|
||||
vec2(-1.0, 1.0));
|
||||
|
||||
lP = pos[gl_VertexID % 6];
|
||||
int cell_index = gl_VertexID / 6;
|
||||
|
||||
@@ -195,7 +195,7 @@ void film_cryptomatte_layer_accum_and_store(
|
||||
return;
|
||||
}
|
||||
/* x = hash, y = accumulated weight. Only keep track of 4 highest weighted samples. */
|
||||
vec2 crypto_samples[4] = vec2[4](vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0));
|
||||
vec2 crypto_samples[4] = float2_array(vec2(0.0), vec2(0.0), vec2(0.0), vec2(0.0));
|
||||
for (int i = 0; i < samples_len; i++) {
|
||||
FilmSample src = film_sample_get(i, texel_film);
|
||||
film_sample_cryptomatte_accum(src, layer_component, cryptomatte_tx, crypto_samples);
|
||||
@@ -246,7 +246,7 @@ vec2 film_pixel_history_motion_vector(ivec2 texel_sample)
|
||||
* Dilate velocity by using the nearest pixel in a cross pattern.
|
||||
* "High Quality Temporal Supersampling" by Brian Karis at SIGGRAPH 2014 (Slide 27)
|
||||
*/
|
||||
const ivec2 corners[4] = ivec2[4](ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
|
||||
const ivec2 corners[4] = int2_array(ivec2(-2, -2), ivec2(2, -2), ivec2(-2, 2), ivec2(2, 2));
|
||||
float min_depth = texelFetch(depth_tx, texel_sample, 0).x;
|
||||
ivec2 nearest_texel = texel_sample;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
@@ -342,11 +342,11 @@ vec4 film_sample_catmull_rom(sampler2D color_tx, vec2 input_texel)
|
||||
void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c)
|
||||
{
|
||||
/* Plus (+) shape offsets. */
|
||||
const ivec2 plus_offsets[5] = ivec2[5](ivec2(0, 0), /* Center */
|
||||
ivec2(-1, 0),
|
||||
ivec2(0, -1),
|
||||
ivec2(1, 0),
|
||||
ivec2(0, 1));
|
||||
const ivec2 plus_offsets[5] = int2_array(ivec2(0, 0), /* Center */
|
||||
ivec2(-1, 0),
|
||||
ivec2(0, -1),
|
||||
ivec2(1, 0),
|
||||
ivec2(0, 1));
|
||||
#if 0
|
||||
/**
|
||||
* Compute Variance of neighborhood as described in:
|
||||
@@ -389,7 +389,7 @@ void film_combined_neighbor_boundbox(ivec2 texel, out vec4 min_c, out vec4 max_c
|
||||
* Round bbox shape by averaging 2 different min/max from 2 different neighborhood. */
|
||||
vec4 min_c_3x3 = min_c;
|
||||
vec4 max_c_3x3 = max_c;
|
||||
const ivec2 corners[4] = ivec2[4](ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
|
||||
const ivec2 corners[4] = int2_array(ivec2(-1, -1), ivec2(1, -1), ivec2(-1, 1), ivec2(1, 1));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
vec4 color = film_texelfetch_as_YCoCg_opacity(combined_tx, texel + corners[i]);
|
||||
min_c_3x3 = min(min_c_3x3, color);
|
||||
|
||||
@@ -220,22 +220,22 @@ void cubeface_sync(int tilemap_id,
|
||||
/* Use switch instead of inline array of float3x3. */
|
||||
switch (cubeface) {
|
||||
case Z_NEG:
|
||||
viewmat = mat4x4(mat3x3(+1, +0, +0, +0, +1, +0, +0, +0, +1)) * viewmat;
|
||||
viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, +1, +0, +0, +0, +1)) * viewmat;
|
||||
break;
|
||||
case X_POS:
|
||||
viewmat = mat4x4(mat3x3(+0, +0, -1, -1, +0, +0, +0, +1, +0)) * viewmat;
|
||||
viewmat = to_float4x4(mat3x3(+0, +0, -1, -1, +0, +0, +0, +1, +0)) * viewmat;
|
||||
break;
|
||||
case X_NEG:
|
||||
viewmat = mat4x4(mat3x3(+0, +0, +1, +1, +0, +0, +0, +1, +0)) * viewmat;
|
||||
viewmat = to_float4x4(mat3x3(+0, +0, +1, +1, +0, +0, +0, +1, +0)) * viewmat;
|
||||
break;
|
||||
case Y_POS:
|
||||
viewmat = mat4x4(mat3x3(+1, +0, +0, +0, +0, -1, +0, +1, +0)) * viewmat;
|
||||
viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, +0, -1, +0, +1, +0)) * viewmat;
|
||||
break;
|
||||
case Y_NEG:
|
||||
viewmat = mat4x4(mat3x3(-1, +0, +0, +0, +0, +1, +0, +1, +0)) * viewmat;
|
||||
viewmat = to_float4x4(mat3x3(-1, +0, +0, +0, +0, +1, +0, +1, +0)) * viewmat;
|
||||
break;
|
||||
case Z_POS:
|
||||
viewmat = mat4x4(mat3x3(+1, +0, +0, +0, -1, +0, +0, +0, -1)) * viewmat;
|
||||
viewmat = to_float4x4(mat3x3(+1, +0, +0, +0, -1, +0, +0, +0, -1)) * viewmat;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -289,7 +289,7 @@ void main()
|
||||
* set to 0 only when the light radius is also 0 to detect this case. */
|
||||
}
|
||||
else {
|
||||
light.object_to_world = transform_from_matrix(mat4x4(from_up_axis(shadow_direction)));
|
||||
light.object_to_world = transform_from_matrix(to_float4x4(from_up_axis(shadow_direction)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -49,7 +49,7 @@ vec3 lightprobe_sphere_parallax(SphereProbeData probe, vec3 P, vec3 L)
|
||||
}
|
||||
/* Correct reflection ray using parallax volume intersection. */
|
||||
vec3 lP = vec4(P, 1.0) * probe.world_to_probe_transposed;
|
||||
vec3 lL = (mat3x3(probe.world_to_probe_transposed) * L) / probe.parallax_distance;
|
||||
vec3 lL = (to_float3x3(probe.world_to_probe_transposed) * L) / probe.parallax_distance;
|
||||
|
||||
float dist = (probe.parallax_shape == SHAPE_ELIPSOID) ? line_unit_sphere_intersect_dist(lP, lL) :
|
||||
line_unit_box_intersect_dist(lP, lL);
|
||||
|
||||
@@ -170,7 +170,7 @@ SphericalHarmonicL1 lightprobe_volume_sample(
|
||||
|
||||
VolumeProbeData grid_data = grids_infos_buf[index];
|
||||
|
||||
mat3x3 world_to_grid_transposed = mat3x3(grid_data.world_to_grid_transposed);
|
||||
mat3x3 world_to_grid_transposed = to_float3x3(grid_data.world_to_grid_transposed);
|
||||
vec3 lNg = safe_normalize(Ng * world_to_grid_transposed);
|
||||
vec3 lV = safe_normalize(V * world_to_grid_transposed);
|
||||
|
||||
|
||||
@@ -104,7 +104,8 @@ void main()
|
||||
}
|
||||
|
||||
/* Rotate Spherical Harmonic into world space. */
|
||||
mat3 grid_to_world_rot = normalize(mat3(grids_infos_buf[grid_index].world_to_grid_transposed));
|
||||
mat3 grid_to_world_rot = normalize(
|
||||
to_float3x3(grids_infos_buf[grid_index].world_to_grid_transposed));
|
||||
sh_local = spherical_harmonics_rotate(grid_to_world_rot, sh_local);
|
||||
|
||||
SphericalHarmonicL1 sh_visibility;
|
||||
|
||||
@@ -36,7 +36,7 @@ void main()
|
||||
sp = bone_mat * sp.xzy + headSphere.xyz;
|
||||
nor = bone_mat * nor.xzy;
|
||||
|
||||
normalView = mat3(drw_view.viewmat) * nor;
|
||||
normalView = to_float3x3(drw_view.viewmat) * nor;
|
||||
|
||||
finalStateColor = stateColor;
|
||||
finalBoneColor = boneColor;
|
||||
|
||||
@@ -34,7 +34,7 @@ VertOut vertex_main(VertIn v_in)
|
||||
|
||||
/* This is slow and run per vertex, but it's still faster than
|
||||
* doing it per instance on CPU and sending it on via instance attribute. */
|
||||
mat3 normal_mat = transpose(inverse(mat3(model_mat)));
|
||||
mat3 normal_mat = transpose(inverse(to_float3x3(model_mat)));
|
||||
|
||||
VertOut v_out;
|
||||
v_out.vPos = view_pos.xyz;
|
||||
|
||||
@@ -39,7 +39,7 @@ void do_vertex_shader(mat4 in_inst_obmat,
|
||||
|
||||
/* This is slow and run per vertex, but it's still faster than
|
||||
* doing it per instance on CPU and sending it on via instance attribute. */
|
||||
mat3 normal_mat = transpose(inverse(mat3(model_mat)));
|
||||
mat3 normal_mat = transpose(inverse(to_float3x3(model_mat)));
|
||||
|
||||
out_ssPos = proj(out_pPos);
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ void main()
|
||||
|
||||
/* This is slow and run per vertex, but it's still faster than
|
||||
* doing it per instance on CPU and sending it on via instance attribute. */
|
||||
mat3 normal_mat = transpose(inverse(mat3(model_mat)));
|
||||
mat3 normal_mat = transpose(inverse(to_float3x3(model_mat)));
|
||||
vec3 normal = normalize(normal_world_to_view(normal_mat * nor));
|
||||
|
||||
inverted = int(dot(cross(model_mat[0].xyz, model_mat[1].xyz), model_mat[2].xyz) < 0.0);
|
||||
|
||||
@@ -27,7 +27,7 @@ void main()
|
||||
* In perspective mode it's also the view-space position
|
||||
* of the sphere center. */
|
||||
vec3 cam_ray = (is_persp) ? model_view_matrix[3].xyz : vec3(0.0, 0.0, -1.0);
|
||||
cam_ray = mat3(sphereMatrix) * cam_ray;
|
||||
cam_ray = to_float3x3(sphereMatrix) * cam_ray;
|
||||
|
||||
/* Sphere center distance from the camera (persp) in local space. */
|
||||
float cam_dist = length(cam_ray);
|
||||
|
||||
@@ -25,7 +25,7 @@ void main()
|
||||
* In perspective mode it's also the view-space position
|
||||
* of the sphere center. */
|
||||
vec3 cam_ray = (is_persp) ? model_view_matrix[3].xyz : vec3(0.0, 0.0, -1.0);
|
||||
cam_ray = mat3(sphereMatrix) * cam_ray;
|
||||
cam_ray = to_float3x3(sphereMatrix) * cam_ray;
|
||||
|
||||
/* Sphere center distance from the camera (persp) in local space. */
|
||||
float cam_dist = length(cam_ray);
|
||||
|
||||
@@ -11,10 +11,10 @@ float dither(void)
|
||||
{
|
||||
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const vec4 dither_mat4x4[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
|
||||
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
|
||||
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
|
||||
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
|
||||
const vec4 dither_mat4x4[4] = float4_array(vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
|
||||
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
|
||||
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
|
||||
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
|
||||
|
||||
ivec2 co = ivec2(gl_FragCoord.xy) % 4;
|
||||
return dither_mat4x4[co.x][co.y];
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
void main()
|
||||
{
|
||||
mat3 imat = mat3(ModelMatrixInverse);
|
||||
mat3 imat = to_float3x3(ModelMatrixInverse);
|
||||
vec3 right = normalize(imat * ViewMatrixInverse[0].xyz);
|
||||
vec3 up = normalize(imat * ViewMatrixInverse[1].xyz);
|
||||
#ifdef VERTEX_PULL
|
||||
|
||||
@@ -61,7 +61,7 @@ void main()
|
||||
thick_time = thickness * (thick_time * 2.0 - 1.0);
|
||||
/* Take object scale into account.
|
||||
* NOTE: This only works fine with uniform scaling. */
|
||||
float scale = 1.0 / length(mat3(ModelMatrixInverse) * binor);
|
||||
float scale = 1.0 / length(to_float3x3(ModelMatrixInverse) * binor);
|
||||
world_pos = center_wpos + binor * thick_time * scale;
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -80,7 +80,7 @@ void main()
|
||||
/* Weights are available */
|
||||
else {
|
||||
float weight = weight_interp.x;
|
||||
vec4 weight_color = texture(colorramp, weight, 0);
|
||||
vec4 weight_color = texture(colorramp, weight);
|
||||
weight_color = apply_color_fac(weight_color);
|
||||
|
||||
/* Contour display */
|
||||
|
||||
@@ -87,14 +87,14 @@ void main()
|
||||
#endif
|
||||
/* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const int indices[8] = int[8](0, 1, 1, 2, 2, 3, 3, 0);
|
||||
const int indices[8] = int_array(0, 1, 1, 2, 2, 3, 3, 0);
|
||||
|
||||
/* Corners for cell outlines. 0.45 is arbitrary. Any value below 0.5 can be used to avoid
|
||||
* overlapping of the outlines. */
|
||||
const vec3 corners[4] = vec3[4](vec3(-0.45, 0.45, 0.0),
|
||||
vec3(0.45, 0.45, 0.0),
|
||||
vec3(0.45, -0.45, 0.0),
|
||||
vec3(-0.45, -0.45, 0.0));
|
||||
const vec3 corners[4] = float3_array(vec3(-0.45, 0.45, 0.0),
|
||||
vec3(0.45, 0.45, 0.0),
|
||||
vec3(0.45, -0.45, 0.0),
|
||||
vec3(-0.45, -0.45, 0.0));
|
||||
|
||||
vec3 pos = domainOriginOffset + cellSize * (vec3(cell_co + adaptiveCellOffset) + cell_offset);
|
||||
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 8]];
|
||||
|
||||
@@ -178,12 +178,12 @@ void main()
|
||||
# ifdef USE_NEEDLE
|
||||
/* NOTE(Metal): Declaring constant arrays in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const vec3 corners[4] = vec3[4](vec3(0.0, 0.2, -0.5),
|
||||
vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.0, 0.0, 0.5));
|
||||
const vec3 corners[4] = float3_array(vec3(0.0, 0.2, -0.5),
|
||||
vec3(-0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.2 * 0.866, -0.2 * 0.5, -0.5),
|
||||
vec3(0.0, 0.0, 0.5));
|
||||
|
||||
const int indices[12] = int[12](0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
|
||||
const int indices[12] = int_array(0, 1, 1, 2, 2, 0, 0, 3, 1, 3, 2, 3);
|
||||
|
||||
vec3 rotated_pos = rot_mat * corners[indices[gl_VertexID % 12]];
|
||||
pos += rotated_pos * vector_length * displaySize * cellSize;
|
||||
|
||||
@@ -100,7 +100,7 @@ void main()
|
||||
if (isHair) {
|
||||
mat4 obmat = hairDupliMatrix;
|
||||
wpos = (obmat * vec4(pos, 1.0)).xyz;
|
||||
wnor = -normalize(mat3(obmat) * nor);
|
||||
wnor = -normalize(to_float3x3(obmat) * nor);
|
||||
}
|
||||
|
||||
bool is_persp = (drw_view.winmat[3][3] == 0.0);
|
||||
|
||||
@@ -190,10 +190,10 @@ vec4 volume_integration(vec3 ray_ori, vec3 ray_dir, float ray_inc, float ray_max
|
||||
{
|
||||
/* NOTE: Constant array declared inside function scope to reduce shader core thread memory
|
||||
* pressure on Apple Silicon. */
|
||||
const vec4 dither_mat[4] = vec4[4](vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
|
||||
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
|
||||
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
|
||||
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
|
||||
const vec4 dither_mat[4] = float4_array(vec4(P(0.0), P(8.0), P(2.0), P(10.0)),
|
||||
vec4(P(12.0), P(4.0), P(14.0), P(6.0)),
|
||||
vec4(P(3.0), P(11.0), P(1.0), P(9.0)),
|
||||
vec4(P(15.0), P(7.0), P(13.0), P(5.0)));
|
||||
/* Start with full transmittance and no scattered light. */
|
||||
vec3 final_scattering = vec3(0.0);
|
||||
float final_transmittance = 1.0;
|
||||
|
||||
@@ -176,7 +176,7 @@ void drw_debug_sphere(vec3 p, float radius)
|
||||
*/
|
||||
void drw_debug_matrix(mat4 mat, vec4 v_color)
|
||||
{
|
||||
vec4 p[4] = vec4[4](vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1));
|
||||
vec4 p[4] = float4_array(vec4(0, 0, 0, 1), vec4(1, 0, 0, 1), vec4(0, 1, 0, 1), vec4(0, 0, 1, 1));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
p[i] = mat * p[i];
|
||||
p[i].xyz /= p[i].w;
|
||||
@@ -195,14 +195,14 @@ void drw_debug_matrix(mat4 mat)
|
||||
*/
|
||||
void drw_debug_matrix_as_bbox(mat4 mat, vec4 v_color)
|
||||
{
|
||||
vec4 p[8] = vec4[8](vec4(-1, -1, -1, 1),
|
||||
vec4(1, -1, -1, 1),
|
||||
vec4(1, 1, -1, 1),
|
||||
vec4(-1, 1, -1, 1),
|
||||
vec4(-1, -1, 1, 1),
|
||||
vec4(1, -1, 1, 1),
|
||||
vec4(1, 1, 1, 1),
|
||||
vec4(-1, 1, 1, 1));
|
||||
vec4 p[8] = float4_array(vec4(-1, -1, -1, 1),
|
||||
vec4(1, -1, -1, 1),
|
||||
vec4(1, 1, -1, 1),
|
||||
vec4(-1, 1, -1, 1),
|
||||
vec4(-1, -1, 1, 1),
|
||||
vec4(1, -1, 1, 1),
|
||||
vec4(1, 1, 1, 1),
|
||||
vec4(-1, 1, 1, 1));
|
||||
for (int i = 0; i < 8; i++) {
|
||||
p[i] = mat * p[i];
|
||||
p[i].xyz /= p[i].w;
|
||||
|
||||
@@ -124,7 +124,7 @@ void drw_print_value_binary(uint value)
|
||||
{
|
||||
drw_print_no_endl("0b");
|
||||
drw_print_string_start(10u * 4u);
|
||||
uint digits[10] = uint[10](0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
|
||||
uint digits[10] = uint_array(0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u);
|
||||
uint digit = 0u;
|
||||
for (uint i = 0u; i < 32u; i++) {
|
||||
drw_print_append_digit(((value >> i) & 1u), digits[digit / 4u]);
|
||||
@@ -155,7 +155,7 @@ void drw_print_value_uint(uint value, const bool hex, bool is_negative, const bo
|
||||
drw_print_string_start(3u * 4u);
|
||||
const uint blank_value = hex ? 0x30303030u : 0x20202020u;
|
||||
const uint prefix = hex ? 0x78302020u : 0x20202020u;
|
||||
uint digits[3] = uint[3](blank_value, blank_value, prefix);
|
||||
uint digits[3] = uint_array(blank_value, blank_value, prefix);
|
||||
const uint base = hex ? 16u : 10u;
|
||||
uint digit = 0u;
|
||||
/* Add `u` suffix. */
|
||||
@@ -247,7 +247,7 @@ void drw_print_value(float val)
|
||||
* and exponent (4). */
|
||||
const float significant_digits = 6.0;
|
||||
drw_print_string_start(3u * 4u);
|
||||
uint digits[3] = uint[3](0x20202020u, 0x20202020u, 0x20202020u);
|
||||
uint digits[3] = uint_array(0x20202020u, 0x20202020u, 0x20202020u);
|
||||
|
||||
float exponent = floor(log(abs(val)) / log(10.0));
|
||||
bool display_exponent = exponent >= (significant_digits) ||
|
||||
|
||||
@@ -210,7 +210,7 @@ void hair_get_center_pos_tan_binor_time(bool is_persp,
|
||||
|
||||
mat4 obmat = hairDupliMatrix;
|
||||
wpos = (obmat * vec4(wpos, 1.0)).xyz;
|
||||
wtan = -normalize(mat3(obmat) * wtan);
|
||||
wtan = -normalize(to_float3x3(obmat) * wtan);
|
||||
|
||||
vec3 camera_vec = (is_persp) ? camera_pos - wpos : camera_z;
|
||||
wbinor = normalize(cross(camera_vec, wtan));
|
||||
@@ -236,7 +236,7 @@ void hair_get_pos_tan_binor_time(bool is_persp,
|
||||
thick_time = thickness * (thick_time * 2.0 - 1.0);
|
||||
/* Take object scale into account.
|
||||
* NOTE: This only works fine with uniform scaling. */
|
||||
float scale = 1.0 / length(mat3(invmodel_mat) * wbinor);
|
||||
float scale = 1.0 / length(to_float3x3(invmodel_mat) * wbinor);
|
||||
wpos += wbinor * thick_time * scale;
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -41,7 +41,7 @@ mat3 mul(mat3 m1, mat3 m2)
|
||||
# ifndef GPU_SHADER_MATH_MATRIX_LIB_GLSL
|
||||
vec3 transform_direction(mat4 m, vec3 v)
|
||||
{
|
||||
return mat3(m) * v;
|
||||
return to_float3x3(m) * v;
|
||||
}
|
||||
vec3 transform_point(mat4 m, vec3 v)
|
||||
{
|
||||
|
||||
@@ -37,7 +37,7 @@ void pointcloud_get_pos_and_radius(out vec3 outpos, out float outradius)
|
||||
int id = pointcloud_get_point_id();
|
||||
vec4 pos_rad = texelFetch(ptcloud_pos_rad_tx, id);
|
||||
outpos = point_object_to_world(pos_rad.xyz);
|
||||
outradius = dot(abs(mat3(ModelMatrix) * pos_rad.www), vec3(1.0 / 3.0));
|
||||
outradius = dot(abs(to_float3x3(ModelMatrix) * pos_rad.www), vec3(1.0 / 3.0));
|
||||
}
|
||||
|
||||
/* Return world position and normal. */
|
||||
|
||||
@@ -227,14 +227,14 @@ uniform mat4 ModelMatrixInverse;
|
||||
* NOTE: This is only valid because we are only using the mat3 of the ViewMatrixInverse.
|
||||
* ViewMatrix * transpose(ModelMatrixInverse)
|
||||
*/
|
||||
# define NormalMatrix transpose(mat3(ModelMatrixInverse))
|
||||
# define NormalMatrixInverse transpose(mat3(ModelMatrix))
|
||||
# define NormalMatrix transpose(to_float3x3(ModelMatrixInverse))
|
||||
# define NormalMatrixInverse transpose(to_float3x3(ModelMatrix))
|
||||
|
||||
# define normal_object_to_view(n) (mat3(ViewMatrix) * (NormalMatrix * n))
|
||||
# define normal_object_to_view(n) (to_float3x3(ViewMatrix) * (NormalMatrix * n))
|
||||
# define normal_object_to_world(n) (NormalMatrix * n)
|
||||
# define normal_world_to_object(n) (NormalMatrixInverse * n)
|
||||
# define normal_world_to_view(n) (mat3(ViewMatrix) * n)
|
||||
# define normal_view_to_world(n) (mat3(ViewMatrixInverse) * n)
|
||||
# define normal_world_to_view(n) (to_float3x3(ViewMatrix) * n)
|
||||
# define normal_view_to_world(n) (to_float3x3(ViewMatrixInverse) * n)
|
||||
|
||||
# define point_object_to_ndc(p) \
|
||||
(ProjectionMatrix * (ViewMatrix * vec4((ModelMatrix * vec4(p, 1.0)).xyz, 1.0)))
|
||||
|
||||
@@ -9,102 +9,102 @@
|
||||
bool char_intersect(uvec2 bitmap_position)
|
||||
{
|
||||
/* Using 8x8 = 64bits = uvec2. */
|
||||
uvec2 ascii_bitmap[96] = uvec2[96](uvec2(0x00000000u, 0x00000000u),
|
||||
uvec2(0x18001800u, 0x183c3c18u),
|
||||
uvec2(0x00000000u, 0x36360000u),
|
||||
uvec2(0x7f363600u, 0x36367f36u),
|
||||
uvec2(0x301f0c00u, 0x0c3e031eu),
|
||||
uvec2(0x0c666300u, 0x00633318u),
|
||||
uvec2(0x3b336e00u, 0x1c361c6eu),
|
||||
uvec2(0x00000000u, 0x06060300u),
|
||||
uvec2(0x060c1800u, 0x180c0606u),
|
||||
uvec2(0x180c0600u, 0x060c1818u),
|
||||
uvec2(0x3c660000u, 0x00663cffu),
|
||||
uvec2(0x0c0c0000u, 0x000c0c3fu),
|
||||
uvec2(0x000c0c06u, 0x00000000u),
|
||||
uvec2(0x00000000u, 0x0000003fu),
|
||||
uvec2(0x000c0c00u, 0x00000000u),
|
||||
uvec2(0x06030100u, 0x6030180cu),
|
||||
uvec2(0x6f673e00u, 0x3e63737bu),
|
||||
uvec2(0x0c0c3f00u, 0x0c0e0c0cu),
|
||||
uvec2(0x06333f00u, 0x1e33301cu),
|
||||
uvec2(0x30331e00u, 0x1e33301cu),
|
||||
uvec2(0x7f307800u, 0x383c3633u),
|
||||
uvec2(0x30331e00u, 0x3f031f30u),
|
||||
uvec2(0x33331e00u, 0x1c06031fu),
|
||||
uvec2(0x0c0c0c00u, 0x3f333018u),
|
||||
uvec2(0x33331e00u, 0x1e33331eu),
|
||||
uvec2(0x30180e00u, 0x1e33333eu),
|
||||
uvec2(0x000c0c00u, 0x000c0c00u),
|
||||
uvec2(0x000c0c06u, 0x000c0c00u),
|
||||
uvec2(0x060c1800u, 0x180c0603u),
|
||||
uvec2(0x003f0000u, 0x00003f00u),
|
||||
uvec2(0x180c0600u, 0x060c1830u),
|
||||
uvec2(0x0c000c00u, 0x1e333018u),
|
||||
uvec2(0x7b031e00u, 0x3e637b7bu),
|
||||
uvec2(0x3f333300u, 0x0c1e3333u),
|
||||
uvec2(0x66663f00u, 0x3f66663eu),
|
||||
uvec2(0x03663c00u, 0x3c660303u),
|
||||
uvec2(0x66361f00u, 0x1f366666u),
|
||||
uvec2(0x16467f00u, 0x7f46161eu),
|
||||
uvec2(0x16060f00u, 0x7f46161eu),
|
||||
uvec2(0x73667c00u, 0x3c660303u),
|
||||
uvec2(0x33333300u, 0x3333333fu),
|
||||
uvec2(0x0c0c1e00u, 0x1e0c0c0cu),
|
||||
uvec2(0x33331e00u, 0x78303030u),
|
||||
uvec2(0x36666700u, 0x6766361eu),
|
||||
uvec2(0x46667f00u, 0x0f060606u),
|
||||
uvec2(0x6b636300u, 0x63777f7fu),
|
||||
uvec2(0x73636300u, 0x63676f7bu),
|
||||
uvec2(0x63361c00u, 0x1c366363u),
|
||||
uvec2(0x06060f00u, 0x3f66663eu),
|
||||
uvec2(0x3b1e3800u, 0x1e333333u),
|
||||
uvec2(0x36666700u, 0x3f66663eu),
|
||||
uvec2(0x38331e00u, 0x1e33070eu),
|
||||
uvec2(0x0c0c1e00u, 0x3f2d0c0cu),
|
||||
uvec2(0x33333f00u, 0x33333333u),
|
||||
uvec2(0x331e0c00u, 0x33333333u),
|
||||
uvec2(0x7f776300u, 0x6363636bu),
|
||||
uvec2(0x1c366300u, 0x6363361cu),
|
||||
uvec2(0x0c0c1e00u, 0x3333331eu),
|
||||
uvec2(0x4c667f00u, 0x7f633118u),
|
||||
uvec2(0x06061e00u, 0x1e060606u),
|
||||
uvec2(0x30604000u, 0x03060c18u),
|
||||
uvec2(0x18181e00u, 0x1e181818u),
|
||||
uvec2(0x00000000u, 0x081c3663u),
|
||||
uvec2(0x000000ffu, 0x00000000u),
|
||||
uvec2(0x00000000u, 0x0c0c1800u),
|
||||
uvec2(0x3e336e00u, 0x00001e30u),
|
||||
uvec2(0x66663b00u, 0x0706063eu),
|
||||
uvec2(0x03331e00u, 0x00001e33u),
|
||||
uvec2(0x33336e00u, 0x3830303eu),
|
||||
uvec2(0x3f031e00u, 0x00001e33u),
|
||||
uvec2(0x06060f00u, 0x1c36060fu),
|
||||
uvec2(0x333e301fu, 0x00006e33u),
|
||||
uvec2(0x66666700u, 0x0706366eu),
|
||||
uvec2(0x0c0c1e00u, 0x0c000e0cu),
|
||||
uvec2(0x3033331eu, 0x30003030u),
|
||||
uvec2(0x1e366700u, 0x07066636u),
|
||||
uvec2(0x0c0c1e00u, 0x0e0c0c0cu),
|
||||
uvec2(0x7f6b6300u, 0x0000337fu),
|
||||
uvec2(0x33333300u, 0x00001f33u),
|
||||
uvec2(0x33331e00u, 0x00001e33u),
|
||||
uvec2(0x663e060fu, 0x00003b66u),
|
||||
uvec2(0x333e3078u, 0x00006e33u),
|
||||
uvec2(0x66060f00u, 0x00003b6eu),
|
||||
uvec2(0x1e301f00u, 0x00003e03u),
|
||||
uvec2(0x0c2c1800u, 0x080c3e0cu),
|
||||
uvec2(0x33336e00u, 0x00003333u),
|
||||
uvec2(0x331e0c00u, 0x00003333u),
|
||||
uvec2(0x7f7f3600u, 0x0000636bu),
|
||||
uvec2(0x1c366300u, 0x00006336u),
|
||||
uvec2(0x333e301fu, 0x00003333u),
|
||||
uvec2(0x0c263f00u, 0x00003f19u),
|
||||
uvec2(0x0c0c3800u, 0x380c0c07u),
|
||||
uvec2(0x18181800u, 0x18181800u),
|
||||
uvec2(0x0c0c0700u, 0x070c0c38u),
|
||||
uvec2(0x00000000u, 0x6e3b0000u),
|
||||
uvec2(0x00000000u, 0x00000000u));
|
||||
uvec2 ascii_bitmap[96] = uint2_array(uvec2(0x00000000u, 0x00000000u),
|
||||
uvec2(0x18001800u, 0x183c3c18u),
|
||||
uvec2(0x00000000u, 0x36360000u),
|
||||
uvec2(0x7f363600u, 0x36367f36u),
|
||||
uvec2(0x301f0c00u, 0x0c3e031eu),
|
||||
uvec2(0x0c666300u, 0x00633318u),
|
||||
uvec2(0x3b336e00u, 0x1c361c6eu),
|
||||
uvec2(0x00000000u, 0x06060300u),
|
||||
uvec2(0x060c1800u, 0x180c0606u),
|
||||
uvec2(0x180c0600u, 0x060c1818u),
|
||||
uvec2(0x3c660000u, 0x00663cffu),
|
||||
uvec2(0x0c0c0000u, 0x000c0c3fu),
|
||||
uvec2(0x000c0c06u, 0x00000000u),
|
||||
uvec2(0x00000000u, 0x0000003fu),
|
||||
uvec2(0x000c0c00u, 0x00000000u),
|
||||
uvec2(0x06030100u, 0x6030180cu),
|
||||
uvec2(0x6f673e00u, 0x3e63737bu),
|
||||
uvec2(0x0c0c3f00u, 0x0c0e0c0cu),
|
||||
uvec2(0x06333f00u, 0x1e33301cu),
|
||||
uvec2(0x30331e00u, 0x1e33301cu),
|
||||
uvec2(0x7f307800u, 0x383c3633u),
|
||||
uvec2(0x30331e00u, 0x3f031f30u),
|
||||
uvec2(0x33331e00u, 0x1c06031fu),
|
||||
uvec2(0x0c0c0c00u, 0x3f333018u),
|
||||
uvec2(0x33331e00u, 0x1e33331eu),
|
||||
uvec2(0x30180e00u, 0x1e33333eu),
|
||||
uvec2(0x000c0c00u, 0x000c0c00u),
|
||||
uvec2(0x000c0c06u, 0x000c0c00u),
|
||||
uvec2(0x060c1800u, 0x180c0603u),
|
||||
uvec2(0x003f0000u, 0x00003f00u),
|
||||
uvec2(0x180c0600u, 0x060c1830u),
|
||||
uvec2(0x0c000c00u, 0x1e333018u),
|
||||
uvec2(0x7b031e00u, 0x3e637b7bu),
|
||||
uvec2(0x3f333300u, 0x0c1e3333u),
|
||||
uvec2(0x66663f00u, 0x3f66663eu),
|
||||
uvec2(0x03663c00u, 0x3c660303u),
|
||||
uvec2(0x66361f00u, 0x1f366666u),
|
||||
uvec2(0x16467f00u, 0x7f46161eu),
|
||||
uvec2(0x16060f00u, 0x7f46161eu),
|
||||
uvec2(0x73667c00u, 0x3c660303u),
|
||||
uvec2(0x33333300u, 0x3333333fu),
|
||||
uvec2(0x0c0c1e00u, 0x1e0c0c0cu),
|
||||
uvec2(0x33331e00u, 0x78303030u),
|
||||
uvec2(0x36666700u, 0x6766361eu),
|
||||
uvec2(0x46667f00u, 0x0f060606u),
|
||||
uvec2(0x6b636300u, 0x63777f7fu),
|
||||
uvec2(0x73636300u, 0x63676f7bu),
|
||||
uvec2(0x63361c00u, 0x1c366363u),
|
||||
uvec2(0x06060f00u, 0x3f66663eu),
|
||||
uvec2(0x3b1e3800u, 0x1e333333u),
|
||||
uvec2(0x36666700u, 0x3f66663eu),
|
||||
uvec2(0x38331e00u, 0x1e33070eu),
|
||||
uvec2(0x0c0c1e00u, 0x3f2d0c0cu),
|
||||
uvec2(0x33333f00u, 0x33333333u),
|
||||
uvec2(0x331e0c00u, 0x33333333u),
|
||||
uvec2(0x7f776300u, 0x6363636bu),
|
||||
uvec2(0x1c366300u, 0x6363361cu),
|
||||
uvec2(0x0c0c1e00u, 0x3333331eu),
|
||||
uvec2(0x4c667f00u, 0x7f633118u),
|
||||
uvec2(0x06061e00u, 0x1e060606u),
|
||||
uvec2(0x30604000u, 0x03060c18u),
|
||||
uvec2(0x18181e00u, 0x1e181818u),
|
||||
uvec2(0x00000000u, 0x081c3663u),
|
||||
uvec2(0x000000ffu, 0x00000000u),
|
||||
uvec2(0x00000000u, 0x0c0c1800u),
|
||||
uvec2(0x3e336e00u, 0x00001e30u),
|
||||
uvec2(0x66663b00u, 0x0706063eu),
|
||||
uvec2(0x03331e00u, 0x00001e33u),
|
||||
uvec2(0x33336e00u, 0x3830303eu),
|
||||
uvec2(0x3f031e00u, 0x00001e33u),
|
||||
uvec2(0x06060f00u, 0x1c36060fu),
|
||||
uvec2(0x333e301fu, 0x00006e33u),
|
||||
uvec2(0x66666700u, 0x0706366eu),
|
||||
uvec2(0x0c0c1e00u, 0x0c000e0cu),
|
||||
uvec2(0x3033331eu, 0x30003030u),
|
||||
uvec2(0x1e366700u, 0x07066636u),
|
||||
uvec2(0x0c0c1e00u, 0x0e0c0c0cu),
|
||||
uvec2(0x7f6b6300u, 0x0000337fu),
|
||||
uvec2(0x33333300u, 0x00001f33u),
|
||||
uvec2(0x33331e00u, 0x00001e33u),
|
||||
uvec2(0x663e060fu, 0x00003b66u),
|
||||
uvec2(0x333e3078u, 0x00006e33u),
|
||||
uvec2(0x66060f00u, 0x00003b6eu),
|
||||
uvec2(0x1e301f00u, 0x00003e03u),
|
||||
uvec2(0x0c2c1800u, 0x080c3e0cu),
|
||||
uvec2(0x33336e00u, 0x00003333u),
|
||||
uvec2(0x331e0c00u, 0x00003333u),
|
||||
uvec2(0x7f7f3600u, 0x0000636bu),
|
||||
uvec2(0x1c366300u, 0x00006336u),
|
||||
uvec2(0x333e301fu, 0x00003333u),
|
||||
uvec2(0x0c263f00u, 0x00003f19u),
|
||||
uvec2(0x0c0c3800u, 0x380c0c07u),
|
||||
uvec2(0x18181800u, 0x18181800u),
|
||||
uvec2(0x0c0c0700u, 0x070c0c38u),
|
||||
uvec2(0x00000000u, 0x6e3b0000u),
|
||||
uvec2(0x00000000u, 0x00000000u));
|
||||
|
||||
if (any(lessThan(bitmap_position, uvec2(0))) || any(greaterThanEqual(bitmap_position, uvec2(8))))
|
||||
{
|
||||
|
||||
@@ -53,11 +53,11 @@ mat4x4 drw_modelinv()
|
||||
*/
|
||||
mat3x3 drw_normat()
|
||||
{
|
||||
return transpose(mat3x3(drw_modelinv()));
|
||||
return transpose(to_float3x3(drw_modelinv()));
|
||||
}
|
||||
mat3x3 drw_norinv()
|
||||
{
|
||||
return transpose(mat3x3(drw_modelmat()));
|
||||
return transpose(to_float3x3(drw_modelmat()));
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -77,11 +77,11 @@ vec3 drw_normal_world_to_object(vec3 N)
|
||||
|
||||
vec3 drw_normal_object_to_view(vec3 lN)
|
||||
{
|
||||
return (mat3x3(drw_view.viewmat) * (drw_normat() * lN));
|
||||
return (to_float3x3(drw_view.viewmat) * (drw_normat() * lN));
|
||||
}
|
||||
vec3 drw_normal_view_to_object(vec3 vN)
|
||||
{
|
||||
return (drw_norinv() * (mat3x3(drw_view.viewinv) * vN));
|
||||
return (drw_norinv() * (to_float3x3(drw_view.viewinv) * vN));
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -108,12 +108,12 @@ float drw_ndc_to_screen(float ndc_P)
|
||||
|
||||
vec3 drw_normal_view_to_world(vec3 vN)
|
||||
{
|
||||
return (mat3x3(drw_view.viewinv) * vN);
|
||||
return (to_float3x3(drw_view.viewinv) * vN);
|
||||
}
|
||||
|
||||
vec3 drw_normal_world_to_view(vec3 N)
|
||||
{
|
||||
return (mat3x3(drw_view.viewmat) * N);
|
||||
return (to_float3x3(drw_view.viewmat) * N);
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
@@ -41,6 +41,11 @@ struct GPUShader;
|
||||
*/
|
||||
GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info);
|
||||
|
||||
/**
|
||||
* Same as GPU_shader_create_from_info but will run preprocessor on source strings.
|
||||
*/
|
||||
GPUShader *GPU_shader_create_from_info_python(const GPUShaderCreateInfo *_info);
|
||||
|
||||
/**
|
||||
* Create a shader using a named #GPUShaderCreateInfo registered at startup.
|
||||
* These are declared inside `*_info.hh` files using the `GPU_SHADER_CREATE_INFO()` macro.
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
# Build `glsl_preprocess` executable.
|
||||
set(SRC
|
||||
glsl_preprocess.cc
|
||||
|
||||
glsl_preprocess.hh
|
||||
)
|
||||
|
||||
# `SRC_DNA_INC` is defined in the parent directory.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
/* SPDX-FileCopyrightText: 2001-2002 NaN Holding BV. All rights reserved.
|
||||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
@@ -6,11 +6,12 @@
|
||||
* \ingroup glsl_preprocess
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "glsl_preprocess.hh"
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
@@ -40,8 +41,29 @@ int main(int argc, char **argv)
|
||||
bool first_comment = true;
|
||||
bool inside_comment = false;
|
||||
|
||||
int error = 0;
|
||||
size_t line_index = 0;
|
||||
|
||||
auto report_error =
|
||||
[&](const std::string &src_line, const std::smatch &match, const char *err_msg) {
|
||||
size_t err_line = line_index;
|
||||
size_t err_char = match.position();
|
||||
|
||||
std::cerr << input_file_name;
|
||||
std::cerr << ':' << std::to_string(err_line) << ':' << std::to_string(err_char);
|
||||
std::cerr << ": error: " << err_msg << std::endl;
|
||||
std::cerr << src_line << std::endl;
|
||||
std::cerr << std::string(err_char, ' ') << '^' << std::endl;
|
||||
|
||||
error++;
|
||||
};
|
||||
|
||||
blender::gpu::shader::Preprocessor processor(report_error);
|
||||
|
||||
std::string line;
|
||||
while (std::getline(input_file, line)) {
|
||||
line_index++;
|
||||
|
||||
/* Remove licence headers (first comment). */
|
||||
if (line.rfind("/*", 0) == 0 && first_comment) {
|
||||
first_comment = false;
|
||||
@@ -55,17 +77,17 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
if (skip_line) {
|
||||
line = "";
|
||||
output_file << "\n";
|
||||
}
|
||||
else if (line.rfind("#include ", 0) == 0 || line.rfind("#pragma once", 0) == 0) {
|
||||
line[0] = line[1] = '/';
|
||||
else {
|
||||
processor << line << '\n';
|
||||
}
|
||||
|
||||
output_file << line << "\n";
|
||||
}
|
||||
|
||||
output_file << processor.str();
|
||||
|
||||
input_file.close();
|
||||
output_file.close();
|
||||
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
219
source/blender/gpu/glsl_preprocess/glsl_preprocess.hh
Normal file
219
source/blender/gpu/glsl_preprocess/glsl_preprocess.hh
Normal file
@@ -0,0 +1,219 @@
|
||||
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
/** \file
|
||||
* \ingroup glsl_preprocess
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace blender::gpu::shader {
|
||||
|
||||
/**
|
||||
* Shader source preprocessor that allow to mutate GLSL into cross API source that can be
|
||||
* interpreted by the different GPU backends. Some syntax are mutated or reported as incompatible.
|
||||
*
|
||||
* Implementation speed is not a huge concern as we only apply this at compile time or on python
|
||||
* shaders source.
|
||||
*/
|
||||
template<typename T, bool no_linting = false> class Preprocessor {
|
||||
T &report_error;
|
||||
|
||||
struct SharedVar {
|
||||
std::string type;
|
||||
std::string name;
|
||||
std::string array;
|
||||
};
|
||||
std::vector<SharedVar> shared_vars_;
|
||||
|
||||
std::stringstream output_;
|
||||
|
||||
public:
|
||||
Preprocessor(T &error_cb) : report_error(error_cb) {}
|
||||
|
||||
Preprocessor &operator<<(std::string str)
|
||||
{
|
||||
threadgroup_variable_parsing(str);
|
||||
matrix_constructor_linting(str);
|
||||
array_constructor_linting(str);
|
||||
str = preprocessor_directive_mutation(str);
|
||||
str = argument_decorator_macro_injection(str);
|
||||
str = array_constructor_macro_injection(str);
|
||||
output_ << str;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Preprocessor &operator<<(char c)
|
||||
{
|
||||
output_ << c;
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
return output_.str() + suffix();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string preprocessor_directive_mutation(const std::string &str)
|
||||
{
|
||||
/* Example: `#include "deps.glsl"` > `//include "deps.glsl"` */
|
||||
std::regex regex("#\\s*(include|pragma once)");
|
||||
return std::regex_replace(str, regex, "//$1");
|
||||
}
|
||||
|
||||
void threadgroup_variable_parsing(std::string str)
|
||||
{
|
||||
std::regex regex("shared\\s+(\\w+)\\s+(\\w+)([^;]*);");
|
||||
for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) {
|
||||
shared_vars_.push_back({match[1].str(), match[2].str(), match[3].str()});
|
||||
}
|
||||
}
|
||||
|
||||
std::string argument_decorator_macro_injection(const std::string &str)
|
||||
{
|
||||
/* Example: `out float var[2]` > `out float _out_sta var _out_end[2]` */
|
||||
std::regex regex("(out|inout|in|shared)\\s+(\\w+)\\s+(\\w+)");
|
||||
return std::regex_replace(str, regex, "$1 $2 _$1_sta $3 _$1_end");
|
||||
}
|
||||
|
||||
std::string array_constructor_macro_injection(const std::string &str)
|
||||
{
|
||||
/* Example: `= float[2](0.0, 0.0)` > `= ARRAY_T(float) ARRAY_V(0.0, 0.0)` */
|
||||
std::regex regex("=\\s*(\\w+)\\s*\\[[^\\]]*\\]\\s*\\(");
|
||||
return std::regex_replace(str, regex, "= ARRAY_T($1) ARRAY_V(");
|
||||
}
|
||||
|
||||
/* TODO(fclem): Too many false positive and false negative to be applied to python shaders. */
|
||||
void matrix_constructor_linting(std::string str)
|
||||
{
|
||||
if constexpr (no_linting) {
|
||||
return;
|
||||
}
|
||||
/* Example: `mat4(other_mat)`. */
|
||||
std::regex regex("\\s+(mat(\\d|\\dx\\d)|float\\dx\\d)\\([^,\\s\\d]+\\)");
|
||||
for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) {
|
||||
/* This only catches some invalid usage. For the rest, the CI will catch them. */
|
||||
const char *msg =
|
||||
"Matrix constructor is not cross API compatible. "
|
||||
"Use to_floatNxM to reshape the matrix or use other constructors instead.";
|
||||
report_error(str, match, msg);
|
||||
}
|
||||
}
|
||||
|
||||
void array_constructor_linting(std::string str)
|
||||
{
|
||||
if constexpr (no_linting) {
|
||||
return;
|
||||
}
|
||||
std::regex regex("=\\s*(\\w+)\\s*\\[[^\\]]*\\]\\s*\\(");
|
||||
for (std::smatch match; std::regex_search(str, match, regex); str = match.suffix()) {
|
||||
/* This only catches some invalid usage. For the rest, the CI will catch them. */
|
||||
const char *msg =
|
||||
"Array constructor is not cross API compatible. Use type_array instead of type[].";
|
||||
report_error(str, match, msg);
|
||||
}
|
||||
}
|
||||
|
||||
std::string suffix()
|
||||
{
|
||||
if (shared_vars_.empty()) {
|
||||
return "";
|
||||
}
|
||||
|
||||
std::stringstream suffix;
|
||||
/**
|
||||
* For Metal shaders to compile, shared (threadgroup) variable cannot be declared globally.
|
||||
* They must reside within a function scope. Hence, we need to extract these declarations and
|
||||
* generate shared memory blocks within the entry point function. These shared memory blocks
|
||||
* can then be passed as references to the remaining shader via the class function scope.
|
||||
*
|
||||
* The shared variable definitions from the source file are replaced with references to
|
||||
* threadgroup memory blocks (using _shared_sta and _shared_end macros), but kept in-line in
|
||||
* case external macros are used to declare the dimensions.
|
||||
*
|
||||
* Each part of the codegen is stored inside macros so that we don't have to do string
|
||||
* replacement at runtime.
|
||||
*/
|
||||
/* Arguments of the wrapper class constructor. */
|
||||
suffix << "#undef MSL_SHARED_VARS_ARGS\n";
|
||||
/* References assignment inside wrapper class constructor. */
|
||||
suffix << "#undef MSL_SHARED_VARS_ASSIGN\n";
|
||||
/* Declaration of threadgroup variables in entry point function. */
|
||||
suffix << "#undef MSL_SHARED_VARS_DECLARE\n";
|
||||
/* Arguments for wrapper class constructor call. */
|
||||
suffix << "#undef MSL_SHARED_VARS_PASS\n";
|
||||
|
||||
/**
|
||||
* Example replacement:
|
||||
*
|
||||
* `
|
||||
* // Source
|
||||
* shared float bar[10]; // Source declaration.
|
||||
* shared float foo; // Source declaration.
|
||||
* // Rest of the source ...
|
||||
* // End of Source
|
||||
*
|
||||
* // Backend Output
|
||||
* class Wrapper { // Added at runtime by backend.
|
||||
*
|
||||
* threadgroup float (&foo); // Replaced by regex and macros.
|
||||
* threadgroup float (&bar)[10]; // Replaced by regex and macros.
|
||||
* // Rest of the source ...
|
||||
*
|
||||
* Wrapper ( // Added at runtime by backend.
|
||||
* threadgroup float (&_foo), threadgroup float (&_bar)[10] // MSL_SHARED_VARS_ARGS
|
||||
* ) // Added at runtime by backend.
|
||||
* : foo(_foo), bar(_bar) // MSL_SHARED_VARS_ASSIGN
|
||||
* {} // Added at runtime by backend.
|
||||
*
|
||||
* }; // End of Wrapper // Added at runtime by backend.
|
||||
*
|
||||
* kernel entry_point() { // Added at runtime by backend.
|
||||
*
|
||||
* threadgroup float foo; // MSL_SHARED_VARS_DECLARE
|
||||
* threadgroup float bar[10] // MSL_SHARED_VARS_DECLARE
|
||||
*
|
||||
* Wrapper wrapper // Added at runtime by backend.
|
||||
* (foo, bar) // MSL_SHARED_VARS_PASS
|
||||
* ; // Added at runtime by backend.
|
||||
*
|
||||
* } // Added at runtime by backend.
|
||||
* // End of Backend Output
|
||||
* `
|
||||
*/
|
||||
std::stringstream args, assign, declare, pass;
|
||||
|
||||
bool first = true;
|
||||
for (SharedVar &var : shared_vars_) {
|
||||
char sep = first ? ' ' : ',';
|
||||
/* */
|
||||
args << sep << "threadgroup " << var.type << "(&_" << var.name << ")" << var.array;
|
||||
assign << (first ? ':' : ',') << var.name << "(_" << var.name << ")";
|
||||
declare << "threadgroup " << var.type << ' ' << var.name << var.array << ";";
|
||||
pass << sep << var.name;
|
||||
first = false;
|
||||
}
|
||||
|
||||
suffix << "#define MSL_SHARED_VARS_ARGS " << args.str() << "\n";
|
||||
suffix << "#define MSL_SHARED_VARS_ASSIGN " << assign.str() << "\n";
|
||||
suffix << "#define MSL_SHARED_VARS_DECLARE " << declare.str() << "\n";
|
||||
suffix << "#define MSL_SHARED_VARS_PASS (" << pass.str() << ")\n";
|
||||
|
||||
return suffix.str();
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> class PreprocessorPython : public Preprocessor<T, true> {
|
||||
public:
|
||||
PreprocessorPython(T &error_cb) : Preprocessor<T, true>(error_cb){};
|
||||
};
|
||||
|
||||
} // namespace blender::gpu::shader
|
||||
@@ -17,6 +17,8 @@
|
||||
#include "GPU_matrix.hh"
|
||||
#include "GPU_platform.hh"
|
||||
|
||||
#include "glsl_preprocess/glsl_preprocess.hh"
|
||||
|
||||
#include "gpu_backend.hh"
|
||||
#include "gpu_context_private.hh"
|
||||
#include "gpu_shader_create_info.hh"
|
||||
@@ -299,6 +301,40 @@ GPUShader *GPU_shader_create_from_info(const GPUShaderCreateInfo *_info)
|
||||
return wrap(Context::get()->compiler->compile(info, false));
|
||||
}
|
||||
|
||||
static std::string preprocess_source(StringRefNull original)
|
||||
{
|
||||
auto no_err_report = [](std::string, std::smatch, const char *) {};
|
||||
gpu::shader::PreprocessorPython processor(no_err_report);
|
||||
processor << std::string(original);
|
||||
return processor.str();
|
||||
};
|
||||
|
||||
GPUShader *GPU_shader_create_from_info_python(const GPUShaderCreateInfo *_info)
|
||||
{
|
||||
using namespace blender::gpu::shader;
|
||||
ShaderCreateInfo &info = *const_cast<ShaderCreateInfo *>(
|
||||
reinterpret_cast<const ShaderCreateInfo *>(_info));
|
||||
|
||||
std::string vertex_source_original = info.vertex_source_generated;
|
||||
std::string fragment_source_original = info.fragment_source_generated;
|
||||
std::string geometry_source_original = info.geometry_source_generated;
|
||||
std::string compute_source_original = info.compute_source_generated;
|
||||
|
||||
info.vertex_source_generated = preprocess_source(info.vertex_source_generated);
|
||||
info.fragment_source_generated = preprocess_source(info.fragment_source_generated);
|
||||
info.geometry_source_generated = preprocess_source(info.geometry_source_generated);
|
||||
info.compute_source_generated = preprocess_source(info.compute_source_generated);
|
||||
|
||||
GPUShader *result = wrap(Context::get()->compiler->compile(info, false));
|
||||
|
||||
info.vertex_source_generated = vertex_source_original;
|
||||
info.fragment_source_generated = fragment_source_original;
|
||||
info.geometry_source_generated = geometry_source_original;
|
||||
info.compute_source_generated = compute_source_original;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
GPUShader *GPU_shader_create_from_python(const char *vertcode,
|
||||
const char *fragcode,
|
||||
const char *geomcode,
|
||||
@@ -315,6 +351,28 @@ GPUShader *GPU_shader_create_from_python(const char *vertcode,
|
||||
libcode = libcodecat = BLI_strdupcat(libcode, datatoc_gpu_shader_colorspace_lib_glsl);
|
||||
}
|
||||
|
||||
std::string vertex_source_processed;
|
||||
std::string fragment_source_processed;
|
||||
std::string geometry_source_processed;
|
||||
std::string library_source_processed;
|
||||
|
||||
if (vertcode != nullptr) {
|
||||
vertex_source_processed = preprocess_source(vertcode);
|
||||
vertcode = vertex_source_processed.c_str();
|
||||
}
|
||||
if (fragcode != nullptr) {
|
||||
fragment_source_processed = preprocess_source(fragcode);
|
||||
fragcode = fragment_source_processed.c_str();
|
||||
}
|
||||
if (geomcode != nullptr) {
|
||||
geometry_source_processed = preprocess_source(geomcode);
|
||||
geomcode = geometry_source_processed.c_str();
|
||||
}
|
||||
if (libcode != nullptr) {
|
||||
library_source_processed = preprocess_source(libcode);
|
||||
libcode = library_source_processed.c_str();
|
||||
}
|
||||
|
||||
/* Use pyGPUShader as default name for shader. */
|
||||
const char *shname = name != nullptr ? name : "pyGPUShader";
|
||||
|
||||
|
||||
@@ -484,7 +484,13 @@ struct GPUSource {
|
||||
int64_t keyword_cursor = 0;
|
||||
out_qualifier = keyword_parse(arg, keyword_cursor);
|
||||
out_type = keyword_parse(arg, keyword_cursor);
|
||||
/* Skip qualifier prefix macro expanded by GLSL preprocessing (e.g. _out_sta). */
|
||||
StringRef qualifier_prefix = keyword_parse(arg, keyword_cursor);
|
||||
out_name = keyword_parse(arg, keyword_cursor);
|
||||
|
||||
if (out_qualifier == "const") {
|
||||
out_name = qualifier_prefix;
|
||||
}
|
||||
if (out_name.is_empty()) {
|
||||
/* No qualifier case. */
|
||||
out_name = out_type;
|
||||
@@ -1031,8 +1037,8 @@ struct GPUSource {
|
||||
GPUSource *dependency_source = nullptr;
|
||||
|
||||
{
|
||||
/* Include directive has been mangled on purpose. See `datatoc.cc`. */
|
||||
pos = source.find("//nclude \"", pos + 1);
|
||||
/* Include directive has been mangled on purpose. See `glsl_preprocess.hh`. */
|
||||
pos = source.find("\n//include \"", pos + 1);
|
||||
if (pos == -1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -426,8 +426,6 @@ class MSLGeneratorInterface {
|
||||
blender::Vector<MSLVertexOutputAttribute> vertex_output_varyings_tf;
|
||||
/* Clip Distances. */
|
||||
blender::Vector<char> clip_distances;
|
||||
/* Shared Memory Blocks. */
|
||||
blender::Vector<MSLSharedMemoryBlock> shared_memory_blocks;
|
||||
/* Max bind IDs. */
|
||||
int max_tex_bind_index = 0;
|
||||
/** GL Global usage. */
|
||||
|
||||
@@ -182,26 +182,6 @@ static void remove_singleline_comments_func(std::string &str)
|
||||
}
|
||||
}
|
||||
|
||||
static bool is_program_word(const char *chr, int *len)
|
||||
{
|
||||
int numchars = 0;
|
||||
for (const char *c = chr; *c != '\0'; c++) {
|
||||
char ch = *c;
|
||||
/* NOTE: Hash (`#`) is not valid in var names, but is used by Closure macro patterns. */
|
||||
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
|
||||
(numchars > 0 && ch >= '0' && ch <= '9') || ch == '_' || ch == '#')
|
||||
{
|
||||
numchars++;
|
||||
}
|
||||
else {
|
||||
*len = numchars;
|
||||
return (numchars > 0);
|
||||
}
|
||||
}
|
||||
*len = numchars;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int backwards_program_word_scan(const char *array_loc, const char *min)
|
||||
{
|
||||
const char *start;
|
||||
@@ -267,130 +247,6 @@ static void extract_and_replace_clipping_distances(std::string &vertex_source,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace function parameter patterns containing:
|
||||
* `out vec3 somevar` with `THD vec3&somevar`.
|
||||
* which enables pass by reference via resolved macro:
|
||||
* `thread vec3& somevar`.
|
||||
*/
|
||||
static void replace_outvars(std::string &str)
|
||||
{
|
||||
char *current_str_begin = &*str.begin();
|
||||
char *current_str_end = &*str.end();
|
||||
|
||||
for (char *c = current_str_begin + 2; c < current_str_end - 6; c++) {
|
||||
char *start = strstr(c, "out ");
|
||||
if (start == nullptr) {
|
||||
return;
|
||||
}
|
||||
else {
|
||||
c = start;
|
||||
if (strncmp(c - 2, "in", 2) == 0) {
|
||||
start = c - 2;
|
||||
}
|
||||
|
||||
/* Check that the following are words. */
|
||||
int len1, len2;
|
||||
char *word_base1 = c + 4;
|
||||
char *word_base2 = word_base1;
|
||||
|
||||
if (is_program_word(word_base1, &len1) && (*(word_base1 + len1) == ' ')) {
|
||||
word_base2 = word_base1 + len1 + 1;
|
||||
if (is_program_word(word_base2, &len2)) {
|
||||
/* Match found. */
|
||||
bool is_array = (*(word_base2 + len2) == '[');
|
||||
if (is_array) {
|
||||
/* Generate out-variable pattern for arrays, of form
|
||||
* `OUT(vec2,samples,CRYPTOMATTE_LEVELS_MAX)`
|
||||
* replacing original `out vec2 samples[SAMPLE_LEN]`
|
||||
* using 'OUT' macro declared in `mtl_shader_defines.msl`. */
|
||||
char *array_end = strchr(word_base2 + len2, ']');
|
||||
if (array_end != nullptr) {
|
||||
*start = 'O';
|
||||
*(start + 1) = 'U';
|
||||
*(start + 2) = 'T';
|
||||
*(start + 3) = '(';
|
||||
for (char *clear = start + 4; clear < c + 4; clear++) {
|
||||
*clear = ' ';
|
||||
}
|
||||
*(word_base2 - 1) = ',';
|
||||
*(word_base2 + len2) = ',';
|
||||
*array_end = ')';
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Generate out-variable pattern of form `THD type&var` from original `out vec4 var`.
|
||||
*/
|
||||
*start = 'T';
|
||||
*(start + 1) = 'H';
|
||||
*(start + 2) = 'D';
|
||||
for (char *clear = start + 3; clear < c + 4; clear++) {
|
||||
*clear = ' ';
|
||||
}
|
||||
*(word_base2 - 1) = '&';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void replace_matrix_constructors(std::string &str)
|
||||
{
|
||||
|
||||
/* Replace matrix constructors with GLSL-compatible constructors for Metal.
|
||||
* Base matrix constructors e.g. mat3x3 do not have as many overload variants as GLSL.
|
||||
* To add compatibility, we declare custom constructors e.g. MAT3x3 in `mtl_shader_defines.msl`.
|
||||
* If the GLSL syntax matches, we map mat3x3(..) -> MAT3x3(..) and implement a custom
|
||||
* constructor. This supports both mat3(..) and mat3x3(..) style syntax. */
|
||||
char *current_str_begin = &*str.begin();
|
||||
char *current_str_end = &*str.end();
|
||||
|
||||
for (char *c = current_str_begin; c < current_str_end - 10; c++) {
|
||||
char *base_scan = strstr(c, "mat");
|
||||
if (base_scan == nullptr) {
|
||||
break;
|
||||
}
|
||||
/* Track end of constructor. */
|
||||
char *constructor_end = nullptr;
|
||||
|
||||
/* check if next character is matrix dim. */
|
||||
c = base_scan + 3;
|
||||
if (!(*c == '2' || *c == '3' || *c == '4')) {
|
||||
/* Not constructor, skip. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Possible multiple dimensional matrix constructor. Verify if next char is a dim. */
|
||||
c++;
|
||||
if (*c == 'x') {
|
||||
c++;
|
||||
if (*c == '2' || *c == '3' || *c == '4') {
|
||||
c++;
|
||||
}
|
||||
else {
|
||||
/* Not matrix constructor, continue. */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for constructor opening brace. */
|
||||
if (*c == '(') {
|
||||
constructor_end = c;
|
||||
}
|
||||
else {
|
||||
/* Not matrix constructor, continue. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If is constructor, replace with MATN(..) syntax. */
|
||||
if (constructor_end != nullptr) {
|
||||
ARRAY_SET_ITEMS(base_scan, 'M', 'A', 'T');
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void replace_array_initializers_func(std::string &str)
|
||||
{
|
||||
char *current_str_begin = &*str.begin();
|
||||
@@ -572,135 +428,6 @@ static bool extract_ssbo_pragma_info(const MTLShader *shader,
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Extract shared memory declaration and their parameters.
|
||||
* Inserts extracted cases as entries in MSLGeneratorInterface's shared memory block
|
||||
* list. These will later be used to generate shared memory declarations within the entry point.
|
||||
*
|
||||
* TODO(Metal/GPU): Move shared memory declarations to GPUShaderCreateInfo. This is currently a
|
||||
* necessary workaround to match GLSL functionality and enable full compute shader support. In the
|
||||
* long term, best to avoid needing to perform this operation. */
|
||||
void extract_shared_memory_blocks(MSLGeneratorInterface &msl_iface,
|
||||
std::string &glsl_compute_source)
|
||||
{
|
||||
msl_iface.shared_memory_blocks.clear();
|
||||
char *current_str_begin = &*glsl_compute_source.begin();
|
||||
char *current_str_end = &*glsl_compute_source.end();
|
||||
|
||||
for (char *c = current_str_begin; c < current_str_end - 6; c++) {
|
||||
/* Find first instance of "shared ". */
|
||||
char *c_expr_start = strstr(c, "shared ");
|
||||
if (c_expr_start == nullptr) {
|
||||
break;
|
||||
}
|
||||
/* Check if "shared" was part of a previous word. If so, this is not valid. */
|
||||
if (next_word_in_range(c_expr_start - 1, c_expr_start) != nullptr) {
|
||||
c += 7; /* Jump forward by length of "shared ". */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Jump to shared declaration and detect end of statement. */
|
||||
c = c_expr_start;
|
||||
char *c_expr_end = strstr(c, ";");
|
||||
if (c_expr_end == nullptr) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Prepare MSLSharedMemoryBlock instance. */
|
||||
MSLSharedMemoryBlock new_shared_block;
|
||||
char buf[256];
|
||||
|
||||
/* Read type-name. */
|
||||
c += 7; /* Jump forward by length of "shared ". */
|
||||
c = next_word_in_range(c, c_expr_end);
|
||||
if (c == nullptr) {
|
||||
c = c_expr_end + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
char *c_next_space = next_symbol_in_range(c, c_expr_end, ' ');
|
||||
if (c_next_space == nullptr) {
|
||||
c = c_expr_end + 1;
|
||||
continue;
|
||||
}
|
||||
int len = c_next_space - c;
|
||||
BLI_assert(len < 256);
|
||||
BLI_strncpy(buf, c, len + 1);
|
||||
new_shared_block.type_name = std::string(buf);
|
||||
|
||||
/* Read var-name.
|
||||
* `varname` can either come right before the final semi-colon, or
|
||||
* with following array syntax.
|
||||
* spaces may exist before closing symbol. */
|
||||
c = c_next_space + 1;
|
||||
c = next_word_in_range(c, c_expr_end);
|
||||
if (c == nullptr) {
|
||||
c = c_expr_end + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
char *c_array_begin = next_symbol_in_range(c, c_expr_end, '[');
|
||||
c_next_space = next_symbol_in_range(c, c_expr_end, ' ');
|
||||
|
||||
char *varname_end = nullptr;
|
||||
if (c_array_begin != nullptr) {
|
||||
/* Array path. */
|
||||
if (c_next_space != nullptr) {
|
||||
varname_end = (c_next_space < c_array_begin) ? c_next_space : c_array_begin;
|
||||
}
|
||||
else {
|
||||
varname_end = c_array_begin;
|
||||
}
|
||||
new_shared_block.is_array = true;
|
||||
}
|
||||
else {
|
||||
/* Ending semi-colon. */
|
||||
if (c_next_space != nullptr) {
|
||||
varname_end = (c_next_space < c_expr_end) ? c_next_space : c_expr_end;
|
||||
}
|
||||
else {
|
||||
varname_end = c_expr_end;
|
||||
}
|
||||
new_shared_block.is_array = false;
|
||||
}
|
||||
len = varname_end - c;
|
||||
BLI_assert(len < 256);
|
||||
BLI_strncpy(buf, c, len + 1);
|
||||
new_shared_block.varname = std::string(buf);
|
||||
|
||||
/* Determine if array. */
|
||||
if (new_shared_block.is_array) {
|
||||
int len = c_expr_end - c_array_begin;
|
||||
BLI_strncpy(buf, c_array_begin, len + 1);
|
||||
new_shared_block.array_decl = std::string(buf);
|
||||
}
|
||||
|
||||
/* Shared block is valid, add it to the list and replace declaration with class member.
|
||||
* reference. This declaration needs to have one of the formats:
|
||||
* TG int& varname;
|
||||
* TG int (&varname)[len][len]
|
||||
*
|
||||
* In order to fit in the same space, replace `threadgroup` with `TG` macro.
|
||||
*/
|
||||
for (char *c = c_expr_start; c <= c_expr_end; c++) {
|
||||
*c = ' ';
|
||||
}
|
||||
std::string out_str = "TG ";
|
||||
out_str += new_shared_block.type_name;
|
||||
out_str += (new_shared_block.is_array) ? "(&" : "&";
|
||||
out_str += new_shared_block.varname;
|
||||
if (new_shared_block.is_array) {
|
||||
out_str += ")" + new_shared_block.array_decl;
|
||||
}
|
||||
out_str += ";;";
|
||||
memcpy(c_expr_start, out_str.c_str(), (out_str.length() - 1) * sizeof(char));
|
||||
|
||||
/* Jump to end of statement. */
|
||||
c = c_expr_end + 1;
|
||||
|
||||
msl_iface.shared_memory_blocks.append(new_shared_block);
|
||||
}
|
||||
}
|
||||
|
||||
/** \} */
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
@@ -973,11 +700,9 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
|
||||
}
|
||||
|
||||
/* Special condition - mat3 and array constructor replacement. */
|
||||
replace_matrix_constructors(shd_builder_->glsl_vertex_source_);
|
||||
replace_array_initializers_func(shd_builder_->glsl_vertex_source_);
|
||||
|
||||
if (!msl_iface.uses_transform_feedback) {
|
||||
replace_matrix_constructors(shd_builder_->glsl_fragment_source_);
|
||||
replace_array_initializers_func(shd_builder_->glsl_fragment_source_);
|
||||
}
|
||||
|
||||
@@ -1058,12 +783,6 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
|
||||
/* Extract gl_ClipDistances. */
|
||||
extract_and_replace_clipping_distances(shd_builder_->glsl_vertex_source_, msl_iface);
|
||||
|
||||
/* Replace 'out' attribute on function parameters with pass-by-reference. */
|
||||
replace_outvars(shd_builder_->glsl_vertex_source_);
|
||||
if (!msl_iface.uses_transform_feedback) {
|
||||
replace_outvars(shd_builder_->glsl_fragment_source_);
|
||||
}
|
||||
|
||||
/**** METAL Shader source generation. ****/
|
||||
/* Setup `stringstream` for populating generated MSL shader vertex/frag shaders. */
|
||||
std::stringstream ss_vertex;
|
||||
@@ -1252,7 +971,7 @@ bool MTLShader::generate_msl_from_glsl(const shader::ShaderCreateInfo *info)
|
||||
/* Add Texture members.
|
||||
* These members pack both a texture and a sampler into a single
|
||||
* struct, as both are needed within texture functions.
|
||||
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
|
||||
* e.g. `_mtl_sampler_2d<float, access::read>`
|
||||
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
|
||||
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
|
||||
if (bool(tex.stage & ShaderStage::VERTEX)) {
|
||||
@@ -1485,7 +1204,6 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
|
||||
BLI_assert(shd_builder_->glsl_compute_source_.size() > 0);
|
||||
|
||||
/*** Source cleanup. ***/
|
||||
replace_matrix_constructors(shd_builder_->glsl_compute_source_);
|
||||
replace_array_initializers_func(shd_builder_->glsl_compute_source_);
|
||||
|
||||
/**** Extract usage of GL globals. ****/
|
||||
@@ -1526,19 +1244,6 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
|
||||
remove_multiline_comments_func(shd_builder_->glsl_compute_source_);
|
||||
remove_singleline_comments_func(shd_builder_->glsl_compute_source_);
|
||||
|
||||
/** Extract usage of shared memory.
|
||||
* For Metal shaders to compile, shared (threadgroup) memory cannot be declared globally.
|
||||
* It must reside within a function scope. Hence, we need to extract these uses and generate
|
||||
* shared memory blocks within the entry point function, which can then be passed as references
|
||||
* to the remaining shader via the class function scope.
|
||||
*
|
||||
* The existing block definitions are then replaced with references to threadgroup memory blocks,
|
||||
* but kept in-line in case external macros are used to declare the dimensions. */
|
||||
extract_shared_memory_blocks(msl_iface, shd_builder_->glsl_compute_source_);
|
||||
|
||||
/* Replace 'out' attribute on function parameters with pass-by-reference. */
|
||||
replace_outvars(shd_builder_->glsl_compute_source_);
|
||||
|
||||
/** Generate Compute shader stage. **/
|
||||
std::stringstream ss_compute;
|
||||
ss_compute << "#line 1 \"msl_wrapper_code\"\n";
|
||||
@@ -1599,7 +1304,7 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
|
||||
/* Add Texture members.
|
||||
* These members pack both a texture and a sampler into a single
|
||||
* struct, as both are needed within texture functions.
|
||||
* e.g. `_mtl_combined_image_sampler_2d<float, access::read>`
|
||||
* e.g. `_mtl_sampler_2d<float, access::read>`
|
||||
* The exact typename is generated inside `get_msl_typestring_wrapper()`. */
|
||||
for (const MSLTextureResource &tex : msl_iface.texture_samplers) {
|
||||
if (bool(tex.stage & ShaderStage::COMPUTE)) {
|
||||
@@ -1631,37 +1336,8 @@ bool MTLShader::generate_msl_from_glsl_compute(const shader::ShaderCreateInfo *i
|
||||
/* Compute constructor for Shared memory blocks, as we must pass
|
||||
* local references from entry-point function scope into the class
|
||||
* instantiation. */
|
||||
ss_compute << get_stage_class_name(ShaderStage::COMPUTE) << "(";
|
||||
bool first = true;
|
||||
if (msl_iface.shared_memory_blocks.size() > 0) {
|
||||
for (const MSLSharedMemoryBlock &block : msl_iface.shared_memory_blocks) {
|
||||
if (!first) {
|
||||
ss_compute << ",";
|
||||
}
|
||||
if (block.is_array) {
|
||||
ss_compute << "TG " << block.type_name << " (&_" << block.varname << ")"
|
||||
<< block.array_decl;
|
||||
}
|
||||
else {
|
||||
ss_compute << "TG " << block.type_name << " &_" << block.varname;
|
||||
}
|
||||
ss_compute << std::endl;
|
||||
first = false;
|
||||
}
|
||||
ss_compute << ") : ";
|
||||
first = true;
|
||||
for (const MSLSharedMemoryBlock &block : msl_iface.shared_memory_blocks) {
|
||||
if (!first) {
|
||||
ss_compute << ",";
|
||||
}
|
||||
ss_compute << block.varname << "(_" << block.varname << ")";
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ss_compute << ") ";
|
||||
}
|
||||
ss_compute << "{ }" << std::endl;
|
||||
ss_compute << get_stage_class_name(ShaderStage::COMPUTE)
|
||||
<< "(MSL_SHARED_VARS_ARGS) MSL_SHARED_VARS_ASSIGN {}\n";
|
||||
|
||||
/* Class Closing Bracket to end shader global scope. */
|
||||
ss_compute << "};" << std::endl;
|
||||
@@ -2440,28 +2116,9 @@ std::string MSLGeneratorInterface::generate_msl_compute_entry_stub()
|
||||
|
||||
out << this->generate_msl_compute_inputs_string();
|
||||
out << ") {" << std::endl << std::endl;
|
||||
/* Generate Compute shader instance constructor. If shared memory blocks are used,
|
||||
* these must be declared and then passed into the constructor. */
|
||||
std::string stage_instance_constructor = "";
|
||||
bool first = true;
|
||||
if (shared_memory_blocks.size() > 0) {
|
||||
stage_instance_constructor += "(";
|
||||
for (const MSLSharedMemoryBlock &block : shared_memory_blocks) {
|
||||
if (block.is_array) {
|
||||
out << "TG " << block.type_name << " " << block.varname << block.array_decl << ";";
|
||||
}
|
||||
else {
|
||||
out << "TG " << block.type_name << " " << block.varname << ";";
|
||||
}
|
||||
stage_instance_constructor += ((!first) ? "," : "") + block.varname;
|
||||
first = false;
|
||||
|
||||
out << std::endl;
|
||||
}
|
||||
stage_instance_constructor += ")";
|
||||
}
|
||||
out << "MSL_SHARED_VARS_DECLARE\n";
|
||||
out << "\t" << get_stage_class_name(ShaderStage::COMPUTE) << " " << shader_stage_inst_name
|
||||
<< stage_instance_constructor << ";" << std::endl;
|
||||
<< " MSL_SHARED_VARS_PASS;\n";
|
||||
|
||||
/* Copy global variables. */
|
||||
/* Entry point parameters for gl Globals. */
|
||||
@@ -3584,9 +3241,10 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_
|
||||
if (tex_buf_id != -1) {
|
||||
MSLBufferBlock &ssbo = this->storage_blocks[tex_buf_id];
|
||||
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
|
||||
<< this->texture_samplers[i].name << ".buffer = " << ssbo.name << ";" << std::endl;
|
||||
<< this->texture_samplers[i].name << ".atomic.buffer = " << ssbo.name << ";"
|
||||
<< std::endl;
|
||||
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
|
||||
<< this->texture_samplers[i].name << ".aligned_width = uniforms->"
|
||||
<< this->texture_samplers[i].name << ".atomic.aligned_width = uniforms->"
|
||||
<< this->texture_samplers[i].name << "_metadata.w;" << std::endl;
|
||||
|
||||
/* Buffer-backed 2D Array and 3D texture types are not natively supported so texture size
|
||||
@@ -3598,7 +3256,7 @@ std::string MSLGeneratorInterface::generate_msl_texture_vars(ShaderStage shader_
|
||||
ImageType::INT_3D_ATOMIC))
|
||||
{
|
||||
out << "\t" << get_shader_stage_instance_name(shader_stage) << "."
|
||||
<< this->texture_samplers[i].name << ".texture_size = ushort3(uniforms->"
|
||||
<< this->texture_samplers[i].name << ".atomic.texture_size = ushort3(uniforms->"
|
||||
<< this->texture_samplers[i].name << "_metadata.xyz);" << std::endl;
|
||||
}
|
||||
}
|
||||
@@ -4022,128 +3680,128 @@ std::string MSLTextureResource::get_msl_wrapper_type_str() const
|
||||
/* Add Types as needed. */
|
||||
switch (this->type) {
|
||||
case ImageType::FLOAT_1D: {
|
||||
return "_mtl_combined_image_sampler_1d";
|
||||
return "_mtl_sampler_1d";
|
||||
}
|
||||
case ImageType::FLOAT_2D: {
|
||||
return "_mtl_combined_image_sampler_2d";
|
||||
return "_mtl_sampler_2d";
|
||||
}
|
||||
case ImageType::FLOAT_3D: {
|
||||
return "_mtl_combined_image_sampler_3d";
|
||||
return "_mtl_sampler_3d";
|
||||
}
|
||||
case ImageType::FLOAT_CUBE: {
|
||||
return "_mtl_combined_image_sampler_cube";
|
||||
return "_mtl_sampler_cube";
|
||||
}
|
||||
case ImageType::FLOAT_1D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_1d_array";
|
||||
return "_mtl_sampler_1d_array";
|
||||
}
|
||||
case ImageType::FLOAT_2D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_2d_array";
|
||||
return "_mtl_sampler_2d_array";
|
||||
}
|
||||
case ImageType::FLOAT_CUBE_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_cube_array";
|
||||
return "_mtl_sampler_cube_array";
|
||||
}
|
||||
case ImageType::FLOAT_BUFFER: {
|
||||
return "_mtl_combined_image_sampler_buffer";
|
||||
return "_mtl_sampler_buffer";
|
||||
}
|
||||
case ImageType::DEPTH_2D: {
|
||||
return "_mtl_combined_image_sampler_depth_2d";
|
||||
return "_mtl_sampler_depth_2d";
|
||||
}
|
||||
case ImageType::SHADOW_2D: {
|
||||
return "_mtl_combined_image_sampler_depth_2d";
|
||||
return "_mtl_sampler_depth_2d";
|
||||
}
|
||||
case ImageType::DEPTH_2D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_depth_2d_array";
|
||||
return "_mtl_sampler_depth_2d_array";
|
||||
}
|
||||
case ImageType::SHADOW_2D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_depth_2d_array";
|
||||
return "_mtl_sampler_depth_2d_array";
|
||||
}
|
||||
case ImageType::DEPTH_CUBE: {
|
||||
return "_mtl_combined_image_sampler_depth_cube";
|
||||
return "_mtl_sampler_depth_cube";
|
||||
}
|
||||
case ImageType::SHADOW_CUBE: {
|
||||
return "_mtl_combined_image_sampler_depth_cube";
|
||||
return "_mtl_sampler_depth_cube";
|
||||
}
|
||||
case ImageType::DEPTH_CUBE_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_depth_cube_array";
|
||||
return "_mtl_sampler_depth_cube_array";
|
||||
}
|
||||
case ImageType::SHADOW_CUBE_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_depth_cube_array";
|
||||
return "_mtl_sampler_depth_cube_array";
|
||||
}
|
||||
case ImageType::INT_1D: {
|
||||
return "_mtl_combined_image_sampler_1d";
|
||||
return "_mtl_sampler_1d";
|
||||
}
|
||||
case ImageType::INT_2D: {
|
||||
return "_mtl_combined_image_sampler_2d";
|
||||
return "_mtl_sampler_2d";
|
||||
}
|
||||
case ImageType::INT_3D: {
|
||||
return "_mtl_combined_image_sampler_3d";
|
||||
return "_mtl_sampler_3d";
|
||||
}
|
||||
case ImageType::INT_CUBE: {
|
||||
return "_mtl_combined_image_sampler_cube";
|
||||
return "_mtl_sampler_cube";
|
||||
}
|
||||
case ImageType::INT_1D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_1d_array";
|
||||
return "_mtl_sampler_1d_array";
|
||||
}
|
||||
case ImageType::INT_2D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_2d_array";
|
||||
return "_mtl_sampler_2d_array";
|
||||
}
|
||||
case ImageType::INT_CUBE_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_cube_array";
|
||||
return "_mtl_sampler_cube_array";
|
||||
}
|
||||
case ImageType::INT_BUFFER: {
|
||||
return "_mtl_combined_image_sampler_buffer";
|
||||
return "_mtl_sampler_buffer";
|
||||
}
|
||||
case ImageType::UINT_1D: {
|
||||
return "_mtl_combined_image_sampler_1d";
|
||||
return "_mtl_sampler_1d";
|
||||
}
|
||||
case ImageType::UINT_2D: {
|
||||
return "_mtl_combined_image_sampler_2d";
|
||||
return "_mtl_sampler_2d";
|
||||
}
|
||||
case ImageType::UINT_3D: {
|
||||
return "_mtl_combined_image_sampler_3d";
|
||||
return "_mtl_sampler_3d";
|
||||
}
|
||||
case ImageType::UINT_CUBE: {
|
||||
return "_mtl_combined_image_sampler_cube";
|
||||
return "_mtl_sampler_cube";
|
||||
}
|
||||
case ImageType::UINT_1D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_1d_array";
|
||||
return "_mtl_sampler_1d_array";
|
||||
}
|
||||
case ImageType::UINT_2D_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_2d_array";
|
||||
return "_mtl_sampler_2d_array";
|
||||
}
|
||||
case ImageType::UINT_CUBE_ARRAY: {
|
||||
return "_mtl_combined_image_sampler_cube_array";
|
||||
return "_mtl_sampler_cube_array";
|
||||
}
|
||||
case ImageType::UINT_BUFFER: {
|
||||
return "_mtl_combined_image_sampler_buffer";
|
||||
return "_mtl_sampler_buffer";
|
||||
}
|
||||
/* If native texture atomics are unsupported, map types to fallback atomic structures which
|
||||
* contain a buffer pointer and metadata members for size and alignment. */
|
||||
case ImageType::INT_2D_ATOMIC:
|
||||
case ImageType::UINT_2D_ATOMIC: {
|
||||
if (supports_native_atomics) {
|
||||
return "_mtl_combined_image_sampler_2d";
|
||||
return "_mtl_sampler_2d";
|
||||
}
|
||||
else {
|
||||
return "_mtl_combined_image_sampler_2d_atomic_fallback";
|
||||
return "_mtl_sampler_2d_atomic";
|
||||
}
|
||||
}
|
||||
case ImageType::INT_3D_ATOMIC:
|
||||
case ImageType::UINT_3D_ATOMIC: {
|
||||
if (supports_native_atomics) {
|
||||
return "_mtl_combined_image_sampler_3d";
|
||||
return "_mtl_sampler_3d";
|
||||
}
|
||||
else {
|
||||
return "_mtl_combined_image_sampler_3d_atomic_fallback";
|
||||
return "_mtl_sampler_3d_atomic";
|
||||
}
|
||||
}
|
||||
case ImageType::INT_2D_ARRAY_ATOMIC:
|
||||
case ImageType::UINT_2D_ARRAY_ATOMIC: {
|
||||
if (supports_native_atomics) {
|
||||
return "_mtl_combined_image_sampler_2d_array";
|
||||
return "_mtl_sampler_2d_array";
|
||||
}
|
||||
else {
|
||||
return "_mtl_combined_image_sampler_2d_array_atomic_fallback";
|
||||
return "_mtl_sampler_2d_array_atomic";
|
||||
}
|
||||
}
|
||||
default: {
|
||||
|
||||
@@ -427,24 +427,6 @@ static void detect_workarounds()
|
||||
{
|
||||
GLContext::unused_fb_slot_workaround = true;
|
||||
}
|
||||
/* dFdx/dFdy calculation factors, those are dependent on driver. */
|
||||
if (GPU_type_matches(GPU_DEVICE_ATI, GPU_OS_ANY, GPU_DRIVER_ANY) && strstr(version, "3.3.10750"))
|
||||
{
|
||||
GLContext::derivative_signs[0] = 1.0;
|
||||
GLContext::derivative_signs[1] = -1.0;
|
||||
}
|
||||
else if (GPU_type_matches(GPU_DEVICE_INTEL, GPU_OS_WIN, GPU_DRIVER_ANY)) {
|
||||
if (strstr(version, "4.0.0 - Build 10.18.10.3308") ||
|
||||
strstr(version, "4.0.0 - Build 9.18.10.3186") ||
|
||||
strstr(version, "4.0.0 - Build 9.18.10.3165") ||
|
||||
strstr(version, "3.1.0 - Build 9.17.10.3347") ||
|
||||
strstr(version, "3.1.0 - Build 9.17.10.4101") ||
|
||||
strstr(version, "3.3.0 - Build 8.15.10.2618"))
|
||||
{
|
||||
GLContext::derivative_signs[0] = -1.0;
|
||||
GLContext::derivative_signs[1] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Draw shader parameters are broken on Qualcomm Windows ARM64 devices
|
||||
* on Mesa version < 24.0.0 */
|
||||
@@ -560,7 +542,6 @@ bool GLContext::texture_filter_anisotropic_support = false;
|
||||
bool GLContext::debug_layer_workaround = false;
|
||||
bool GLContext::unused_fb_slot_workaround = false;
|
||||
bool GLContext::generate_mipmap_workaround = false;
|
||||
float GLContext::derivative_signs[2] = {1.0f, 1.0f};
|
||||
|
||||
void GLBackend::capabilities_init()
|
||||
{
|
||||
|
||||
@@ -67,7 +67,6 @@ class GLContext : public Context {
|
||||
static bool debug_layer_workaround;
|
||||
static bool unused_fb_slot_workaround;
|
||||
static bool generate_mipmap_workaround;
|
||||
static float derivative_signs[2];
|
||||
|
||||
/** VBO for missing vertex attribute binding. Avoid undefined behavior on some implementation. */
|
||||
GLuint default_attr_vbo_;
|
||||
|
||||
@@ -1059,10 +1059,6 @@ static const char *glsl_patch_default_get()
|
||||
/* Array compatibility. */
|
||||
ss << "#define gpu_Array(_type) _type[]\n";
|
||||
|
||||
/* Derivative sign can change depending on implementation. */
|
||||
ss << "#define DFDX_SIGN " << std::setprecision(2) << GLContext::derivative_signs[0] << "\n";
|
||||
ss << "#define DFDY_SIGN " << std::setprecision(2) << GLContext::derivative_signs[1] << "\n";
|
||||
|
||||
/* GLSL Backend Lib. */
|
||||
ss << datatoc_glsl_shader_defines_glsl;
|
||||
|
||||
|
||||
@@ -803,7 +803,7 @@ mat3x3 rotate(mat3x3 mat, EulerXYZ rotation)
|
||||
|
||||
mat4x4 rotate(mat4x4 mat, AxisAngle rotation)
|
||||
{
|
||||
mat4x4 result = mat4x4(rotate(mat3x3(mat), rotation));
|
||||
mat4x4 result = to_float4x4(rotate(to_float3x3(mat), rotation));
|
||||
result[0][3] = mat[0][3];
|
||||
result[1][3] = mat[1][3];
|
||||
result[2][3] = mat[2][3];
|
||||
@@ -815,7 +815,7 @@ mat4x4 rotate(mat4x4 mat, AxisAngle rotation)
|
||||
}
|
||||
mat4x4 rotate(mat4x4 mat, EulerXYZ rotation)
|
||||
{
|
||||
mat4x4 result = mat4x4(rotate(mat3x3(mat), rotation));
|
||||
mat4x4 result = to_float4x4(rotate(to_float3x3(mat), rotation));
|
||||
result[0][3] = mat[0][3];
|
||||
result[1][3] = mat[1][3];
|
||||
result[2][3] = mat[2][3];
|
||||
@@ -992,38 +992,38 @@ mat3x3 from_rot_scale(AxisAngle rotation, vec3 scale)
|
||||
|
||||
mat4x4 from_loc_rot(vec3 location, EulerXYZ rotation)
|
||||
{
|
||||
mat4x4 ret = mat4x4(from_rotation(rotation));
|
||||
mat4x4 ret = to_float4x4(from_rotation(rotation));
|
||||
ret[3].xyz = location;
|
||||
return ret;
|
||||
}
|
||||
mat4x4 from_loc_rot(vec3 location, Quaternion rotation)
|
||||
{
|
||||
mat4x4 ret = mat4x4(from_rotation(rotation));
|
||||
mat4x4 ret = to_float4x4(from_rotation(rotation));
|
||||
ret[3].xyz = location;
|
||||
return ret;
|
||||
}
|
||||
mat4x4 from_loc_rot(vec3 location, AxisAngle rotation)
|
||||
{
|
||||
mat4x4 ret = mat4x4(from_rotation(rotation));
|
||||
mat4x4 ret = to_float4x4(from_rotation(rotation));
|
||||
ret[3].xyz = location;
|
||||
return ret;
|
||||
}
|
||||
|
||||
mat4x4 from_loc_rot_scale(vec3 location, EulerXYZ rotation, vec3 scale)
|
||||
{
|
||||
mat4x4 ret = mat4x4(from_rot_scale(rotation, scale));
|
||||
mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale));
|
||||
ret[3].xyz = location;
|
||||
return ret;
|
||||
}
|
||||
mat4x4 from_loc_rot_scale(vec3 location, Quaternion rotation, vec3 scale)
|
||||
{
|
||||
mat4x4 ret = mat4x4(from_rot_scale(rotation, scale));
|
||||
mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale));
|
||||
ret[3].xyz = location;
|
||||
return ret;
|
||||
}
|
||||
mat4x4 from_loc_rot_scale(vec3 location, AxisAngle rotation, vec3 scale)
|
||||
{
|
||||
mat4x4 ret = mat4x4(from_rot_scale(rotation, scale));
|
||||
mat4x4 ret = to_float4x4(from_rot_scale(rotation, scale));
|
||||
ret[3].xyz = location;
|
||||
return ret;
|
||||
}
|
||||
@@ -1079,11 +1079,11 @@ EulerXYZ to_euler(mat3x3 mat, const bool normalized)
|
||||
}
|
||||
EulerXYZ to_euler(mat4x4 mat)
|
||||
{
|
||||
return to_euler(mat3(mat));
|
||||
return to_euler(to_float3x3(mat));
|
||||
}
|
||||
EulerXYZ to_euler(mat4x4 mat, const bool normalized)
|
||||
{
|
||||
return to_euler(mat3(mat), normalized);
|
||||
return to_euler(to_float3x3(mat), normalized);
|
||||
}
|
||||
|
||||
Quaternion normalized_to_quat_fast(mat3 mat)
|
||||
@@ -1193,11 +1193,11 @@ Quaternion to_quaternion(mat3x3 mat, const bool normalized)
|
||||
}
|
||||
Quaternion to_quaternion(mat4x4 mat)
|
||||
{
|
||||
return to_quaternion(mat3(mat));
|
||||
return to_quaternion(to_float3x3(mat));
|
||||
}
|
||||
Quaternion to_quaternion(mat4x4 mat, const bool normalized)
|
||||
{
|
||||
return to_quaternion(mat3(mat), normalized);
|
||||
return to_quaternion(to_float3x3(mat), normalized);
|
||||
}
|
||||
|
||||
vec3 to_scale(mat3x3 mat)
|
||||
@@ -1216,11 +1216,11 @@ vec3 to_scale(mat3x3 mat, const bool allow_negative_scale)
|
||||
}
|
||||
vec3 to_scale(mat4x4 mat)
|
||||
{
|
||||
return to_scale(mat3(mat));
|
||||
return to_scale(to_float3x3(mat));
|
||||
}
|
||||
vec3 to_scale(mat4x4 mat, const bool allow_negative_scale)
|
||||
{
|
||||
return to_scale(mat3(mat), allow_negative_scale);
|
||||
return to_scale(to_float3x3(mat), allow_negative_scale);
|
||||
}
|
||||
|
||||
void to_rot_scale(mat3x3 mat, out EulerXYZ r_rotation, out vec3 r_scale)
|
||||
@@ -1265,7 +1265,7 @@ void to_rot_scale(mat3x3 mat,
|
||||
void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, out EulerXYZ r_rotation, out vec3 r_scale)
|
||||
{
|
||||
r_location = mat[3].xyz;
|
||||
to_rot_scale(mat3(mat), r_rotation, r_scale);
|
||||
to_rot_scale(to_float3x3(mat), r_rotation, r_scale);
|
||||
}
|
||||
void to_loc_rot_scale(mat4x4 mat,
|
||||
out vec3 r_location,
|
||||
@@ -1274,12 +1274,12 @@ void to_loc_rot_scale(mat4x4 mat,
|
||||
const bool allow_negative_scale)
|
||||
{
|
||||
r_location = mat[3].xyz;
|
||||
to_rot_scale(mat3(mat), r_rotation, r_scale, allow_negative_scale);
|
||||
to_rot_scale(to_float3x3(mat), r_rotation, r_scale, allow_negative_scale);
|
||||
}
|
||||
void to_loc_rot_scale(mat4x4 mat, out vec3 r_location, out Quaternion r_rotation, out vec3 r_scale)
|
||||
{
|
||||
r_location = mat[3].xyz;
|
||||
to_rot_scale(mat3(mat), r_rotation, r_scale);
|
||||
to_rot_scale(to_float3x3(mat), r_rotation, r_scale);
|
||||
}
|
||||
void to_loc_rot_scale(mat4x4 mat,
|
||||
out vec3 r_location,
|
||||
@@ -1288,7 +1288,7 @@ void to_loc_rot_scale(mat4x4 mat,
|
||||
const bool allow_negative_scale)
|
||||
{
|
||||
r_location = mat[3].xyz;
|
||||
to_rot_scale(mat3(mat), r_rotation, r_scale, allow_negative_scale);
|
||||
to_rot_scale(to_float3x3(mat), r_rotation, r_scale, allow_negative_scale);
|
||||
}
|
||||
|
||||
vec3 transform_point(mat3x3 mat, vec3 point)
|
||||
@@ -1308,7 +1308,7 @@ vec3 transform_direction(mat3x3 mat, vec3 direction)
|
||||
|
||||
vec3 transform_direction(mat4x4 mat, vec3 direction)
|
||||
{
|
||||
return mat3x3(mat) * direction;
|
||||
return to_float3x3(mat) * direction;
|
||||
}
|
||||
|
||||
vec2 project_point(mat3x3 mat, vec2 point)
|
||||
@@ -1423,7 +1423,7 @@ bool is_negative(mat3x3 mat)
|
||||
}
|
||||
bool is_negative(mat4x4 mat)
|
||||
{
|
||||
return is_negative(mat3x3(mat));
|
||||
return is_negative(to_float3x3(mat));
|
||||
}
|
||||
|
||||
bool is_equal(mat2x2 a, mat2x2 b, float epsilon)
|
||||
@@ -1505,15 +1505,15 @@ bool is_uniformly_scaled(mat3x3 mat)
|
||||
|
||||
bool is_orthogonal(mat4x4 mat)
|
||||
{
|
||||
return is_orthogonal(mat3x3(mat));
|
||||
return is_orthogonal(to_float3x3(mat));
|
||||
}
|
||||
bool is_orthonormal(mat4x4 mat)
|
||||
{
|
||||
return is_orthonormal(mat3x3(mat));
|
||||
return is_orthonormal(to_float3x3(mat));
|
||||
}
|
||||
bool is_uniformly_scaled(mat4x4 mat)
|
||||
{
|
||||
return is_uniformly_scaled(mat3x3(mat));
|
||||
return is_uniformly_scaled(to_float3x3(mat));
|
||||
}
|
||||
|
||||
/* Returns true if each individual columns are unit scaled. Mainly for assert usage. */
|
||||
|
||||
@@ -85,12 +85,15 @@ vec2 do_tria()
|
||||
float size = (tria2) ? -tria2Size : tria1Size;
|
||||
vec2 center = (tria2) ? tria2Center : tria1Center;
|
||||
|
||||
vec2 arrow_pos[4] = vec2[4](vec2(0.0, 0.6), vec2(0.6, 0.0), vec2(-0.6, 0.0), vec2(0.0, -0.6));
|
||||
vec2 arrow_pos[4] = float2_array(
|
||||
vec2(0.0, 0.6), vec2(0.6, 0.0), vec2(-0.6, 0.0), vec2(0.0, -0.6));
|
||||
/* Rotated uv space by 45deg and mirrored. */
|
||||
vec2 arrow_uvs[4] = vec2[4](vec2(0.0, 0.85), vec2(0.85, 0.85), vec2(0.0, 0.0), vec2(0.0, 0.85));
|
||||
vec2 arrow_uvs[4] = float2_array(
|
||||
vec2(0.0, 0.85), vec2(0.85, 0.85), vec2(0.0, 0.0), vec2(0.0, 0.85));
|
||||
|
||||
vec2 point_pos[4] = vec2[4](vec2(-1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, -1.0), vec2(1.0, 1.0));
|
||||
vec2 point_uvs[4] = vec2[4](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 0.0), vec2(1.0, 1.0));
|
||||
vec2 point_pos[4] = float2_array(
|
||||
vec2(-1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, -1.0), vec2(1.0, 1.0));
|
||||
vec2 point_uvs[4] = float2_array(vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 0.0), vec2(1.0, 1.0));
|
||||
|
||||
/* We reuse the SDF round-box rendering of widget to render the tria shapes.
|
||||
* This means we do clever tricks to position the rectangle the way we want using
|
||||
|
||||
@@ -30,44 +30,44 @@ void main()
|
||||
{
|
||||
/* NOTE(Metal): Declaring constant array in function scope to avoid increasing local shader
|
||||
* memory pressure. */
|
||||
const vec2 cornervec[36] = vec2[36](vec2(0.0, 1.0),
|
||||
vec2(0.02, 0.805),
|
||||
vec2(0.067, 0.617),
|
||||
vec2(0.169, 0.45),
|
||||
vec2(0.293, 0.293),
|
||||
vec2(0.45, 0.169),
|
||||
vec2(0.617, 0.076),
|
||||
vec2(0.805, 0.02),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(-0.805, 0.02),
|
||||
vec2(-0.617, 0.067),
|
||||
vec2(-0.45, 0.169),
|
||||
vec2(-0.293, 0.293),
|
||||
vec2(-0.169, 0.45),
|
||||
vec2(-0.076, 0.617),
|
||||
vec2(-0.02, 0.805),
|
||||
vec2(0.0, 1.0),
|
||||
vec2(0.0, -1.0),
|
||||
vec2(-0.02, -0.805),
|
||||
vec2(-0.067, -0.617),
|
||||
vec2(-0.169, -0.45),
|
||||
vec2(-0.293, -0.293),
|
||||
vec2(-0.45, -0.169),
|
||||
vec2(-0.617, -0.076),
|
||||
vec2(-0.805, -0.02),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(0.805, -0.02),
|
||||
vec2(0.617, -0.067),
|
||||
vec2(0.45, -0.169),
|
||||
vec2(0.293, -0.293),
|
||||
vec2(0.169, -0.45),
|
||||
vec2(0.076, -0.617),
|
||||
vec2(0.02, -0.805),
|
||||
vec2(0.0, -1.0));
|
||||
const vec2 cornervec[36] = float2_array(vec2(0.0, 1.0),
|
||||
vec2(0.02, 0.805),
|
||||
vec2(0.067, 0.617),
|
||||
vec2(0.169, 0.45),
|
||||
vec2(0.293, 0.293),
|
||||
vec2(0.45, 0.169),
|
||||
vec2(0.617, 0.076),
|
||||
vec2(0.805, 0.02),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(-0.805, 0.02),
|
||||
vec2(-0.617, 0.067),
|
||||
vec2(-0.45, 0.169),
|
||||
vec2(-0.293, 0.293),
|
||||
vec2(-0.169, 0.45),
|
||||
vec2(-0.076, 0.617),
|
||||
vec2(-0.02, 0.805),
|
||||
vec2(0.0, 1.0),
|
||||
vec2(0.0, -1.0),
|
||||
vec2(-0.02, -0.805),
|
||||
vec2(-0.067, -0.617),
|
||||
vec2(-0.169, -0.45),
|
||||
vec2(-0.293, -0.293),
|
||||
vec2(-0.45, -0.169),
|
||||
vec2(-0.617, -0.076),
|
||||
vec2(-0.805, -0.02),
|
||||
vec2(-1.0, 0.0),
|
||||
vec2(1.0, 0.0),
|
||||
vec2(0.805, -0.02),
|
||||
vec2(0.617, -0.067),
|
||||
vec2(0.45, -0.169),
|
||||
vec2(0.293, -0.293),
|
||||
vec2(0.169, -0.45),
|
||||
vec2(0.076, -0.617),
|
||||
vec2(0.02, -0.805),
|
||||
vec2(0.0, -1.0));
|
||||
|
||||
const vec2 center_offset[4] = vec2[4](
|
||||
const vec2 center_offset[4] = float2_array(
|
||||
vec2(1.0, 1.0), vec2(-1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0));
|
||||
|
||||
uint cflag = vflag & CNR_FLAG_RANGE;
|
||||
|
||||
@@ -338,8 +338,8 @@ vec3 dF_impl(vec3 v)
|
||||
|
||||
void dF_branch(float fn, out vec2 result)
|
||||
{
|
||||
result.x = DFDX_SIGN * dFdx(fn);
|
||||
result.y = DFDY_SIGN * dFdy(fn);
|
||||
result.x = dFdx(fn);
|
||||
result.y = dFdy(fn);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -349,10 +349,10 @@ int g_derivative_flag = 0;
|
||||
vec3 dF_impl(vec3 v)
|
||||
{
|
||||
if (g_derivative_flag > 0) {
|
||||
return DFDX_SIGN * dFdx(v);
|
||||
return dFdx(v);
|
||||
}
|
||||
else if (g_derivative_flag < 0) {
|
||||
return DFDY_SIGN * dFdy(v);
|
||||
return dFdy(v);
|
||||
}
|
||||
return vec3(0.0);
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@ void main()
|
||||
/* 3x3 blur */
|
||||
|
||||
/* clang-format off */
|
||||
const float weights3x3[16] = float[16](
|
||||
const float weights3x3[16] = float_array(
|
||||
1.0, 2.0, 1.0, 0.0,
|
||||
2.0, 4.0, 2.0, 0.0,
|
||||
1.0, 2.0, 1.0, 0.0,
|
||||
@@ -169,7 +169,7 @@ void main()
|
||||
/* 5x5 blur */
|
||||
|
||||
/* clang-format off */
|
||||
const float weights5x5[36] = float[36](
|
||||
const float weights5x5[36] = float_array(
|
||||
1.0, 2.0, 2.0, 2.0, 1.0, 0.0,
|
||||
2.0, 5.0, 6.0, 5.0, 2.0, 0.0,
|
||||
2.0, 6.0, 8.0, 6.0, 2.0, 0.0,
|
||||
|
||||
@@ -7,45 +7,45 @@
|
||||
void normal_transform_object_to_world(vec3 vin, out vec3 vout)
|
||||
{
|
||||
/* Expansion of NormalMatrix. */
|
||||
vout = vin * mat3(ModelMatrixInverse);
|
||||
vout = vin * to_float3x3(ModelMatrixInverse);
|
||||
}
|
||||
|
||||
void normal_transform_world_to_object(vec3 vin, out vec3 vout)
|
||||
{
|
||||
/* Expansion of NormalMatrixInverse. */
|
||||
vout = vin * mat3(ModelMatrix);
|
||||
vout = vin * to_float3x3(ModelMatrix);
|
||||
}
|
||||
|
||||
void direction_transform_object_to_world(vec3 vin, out vec3 vout)
|
||||
{
|
||||
vout = mat3x3(ModelMatrix) * vin;
|
||||
vout = to_float3x3(ModelMatrix) * vin;
|
||||
}
|
||||
|
||||
void direction_transform_object_to_view(vec3 vin, out vec3 vout)
|
||||
{
|
||||
vout = mat3x3(ModelMatrix) * vin;
|
||||
vout = mat3x3(ViewMatrix) * vout;
|
||||
vout = to_float3x3(ModelMatrix) * vin;
|
||||
vout = to_float3x3(ViewMatrix) * vout;
|
||||
}
|
||||
|
||||
void direction_transform_view_to_world(vec3 vin, out vec3 vout)
|
||||
{
|
||||
vout = mat3x3(ViewMatrixInverse) * vin;
|
||||
vout = to_float3x3(ViewMatrixInverse) * vin;
|
||||
}
|
||||
|
||||
void direction_transform_view_to_object(vec3 vin, out vec3 vout)
|
||||
{
|
||||
vout = mat3x3(ViewMatrixInverse) * vin;
|
||||
vout = mat3x3(ModelMatrixInverse) * vout;
|
||||
vout = to_float3x3(ViewMatrixInverse) * vin;
|
||||
vout = to_float3x3(ModelMatrixInverse) * vout;
|
||||
}
|
||||
|
||||
void direction_transform_world_to_view(vec3 vin, out vec3 vout)
|
||||
{
|
||||
vout = mat3x3(ViewMatrix) * vin;
|
||||
vout = to_float3x3(ViewMatrix) * vin;
|
||||
}
|
||||
|
||||
void direction_transform_world_to_object(vec3 vin, out vec3 vout)
|
||||
{
|
||||
vout = mat3x3(ModelMatrixInverse) * vin;
|
||||
vout = to_float3x3(ModelMatrixInverse) * vin;
|
||||
}
|
||||
|
||||
void point_transform_object_to_world(vec3 vin, out vec3 vout)
|
||||
|
||||
@@ -13,13 +13,37 @@
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
#pragma clang diagnostic ignored "-Wcomment"
|
||||
|
||||
#define ENABLE_IF(cond) thread metal::enable_if_t<(cond)> * = nullptr
|
||||
|
||||
/* Base instance with offsets. */
|
||||
#define gpu_BaseInstance gl_BaseInstanceARB
|
||||
#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)
|
||||
|
||||
/* derivative signs. */
|
||||
#define DFDX_SIGN 1.0
|
||||
#define DFDY_SIGN 1.0
|
||||
#ifdef MTL_WORKGROUP_SIZE_X
|
||||
/* Older Metal compiler version don't treat vector component access as constexpr.
|
||||
* We have to make a wrapper class for that otherwise we cannot use WorkGroupSize for sizing
|
||||
* threadgroup arrays. Note that this bug is not present in the version 4.1 of the compiler. */
|
||||
struct mtl_WorkGroupSize {
|
||||
union {
|
||||
struct {
|
||||
uint x, y, z;
|
||||
};
|
||||
uint2 xy;
|
||||
uint3 xyz;
|
||||
};
|
||||
|
||||
constexpr mtl_WorkGroupSize()
|
||||
: x(MTL_WORKGROUP_SIZE_X), y(MTL_WORKGROUP_SIZE_Y), z(MTL_WORKGROUP_SIZE_Z)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr inline operator uint3() const
|
||||
{
|
||||
return xyz;
|
||||
}
|
||||
};
|
||||
# define gl_WorkGroupSize mtl_WorkGroupSize()
|
||||
#endif
|
||||
|
||||
/* Type definitions. */
|
||||
/* int implicitly cast to bool in MSL. */
|
||||
@@ -53,64 +77,9 @@ using bvec3 = bool3;
|
||||
using bvec4 = bool4;
|
||||
|
||||
/* Compute decorators. */
|
||||
#define TG threadgroup
|
||||
#define barrier() \
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup | mem_flags::mem_device | mem_flags::mem_texture)
|
||||
|
||||
#ifdef MTL_USE_WORKGROUP_SIZE
|
||||
/* Compute work-group size. */
|
||||
struct constexp_uvec3 {
|
||||
/* Type union to cover all syntax accessors:
|
||||
* .x, .y, .z, .xy, .xyz
|
||||
* Swizzle types invalid.*/
|
||||
union {
|
||||
struct {
|
||||
uint x, y, z;
|
||||
};
|
||||
struct {
|
||||
uint2 xy;
|
||||
};
|
||||
uint3 xyz;
|
||||
};
|
||||
|
||||
constexpr constexp_uvec3(uint _x, uint _y, uint _z) : x(_x), y(_y), z(_z) {}
|
||||
constexpr uint operator[](int i)
|
||||
{
|
||||
/* Note: Need to switch on each elem value as array accessor triggers
|
||||
* non-constant sizing error. This will be statically evaluated at compile time. */
|
||||
switch (i) {
|
||||
case 0:
|
||||
return x;
|
||||
case 1:
|
||||
return y;
|
||||
case 2:
|
||||
return z;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
constexpr inline operator uint3() const
|
||||
{
|
||||
return xyz;
|
||||
}
|
||||
constexpr inline operator uint2() const
|
||||
{
|
||||
return xy;
|
||||
}
|
||||
constexpr inline operator uint() const
|
||||
{
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
constexpr constexp_uvec3 __internal_workgroupsize_get()
|
||||
{
|
||||
return constexp_uvec3(MTL_WORKGROUP_SIZE_X, MTL_WORKGROUP_SIZE_Y, MTL_WORKGROUP_SIZE_Z);
|
||||
}
|
||||
|
||||
# define gl_WorkGroupSize __internal_workgroupsize_get()
|
||||
#endif
|
||||
|
||||
/** Shader atomics:
|
||||
* In order to emulate GLSL-style atomic operations, wherein variables can be used within atomic
|
||||
* operations, even if they are not explicitly declared atomic, we can cast the pointer to atomic,
|
||||
@@ -118,162 +87,652 @@ constexpr constexp_uvec3 __internal_workgroupsize_get()
|
||||
*
|
||||
* NOTE: We cannot hoist the address space into the template declaration, so these must be declared
|
||||
* for each relevant address space. */
|
||||
#define ATOMIC_OP_EX(qualifier, glsl_op, mtl_op) \
|
||||
template<typename T> T atomic##glsl_op(qualifier T &mem, T data) \
|
||||
{ \
|
||||
return atomic_##mtl_op##_explicit((qualifier _atomic<T> *)&mem, data, memory_order_relaxed); \
|
||||
}
|
||||
|
||||
/* Thread-group memory. */
|
||||
template<typename T> T atomicMax(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_max_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicMin(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_min_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicAdd(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_add_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicSub(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_sub_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicAnd(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_and_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicOr(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_or_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicXor(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_xor_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicExchange(threadgroup T &mem, T data)
|
||||
{
|
||||
return atomic_exchange_explicit((threadgroup _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
#define ATOMIC_OP(glsl_op, mtl_op) \
|
||||
ATOMIC_OP_EX(threadgroup, glsl_op, mtl_op) \
|
||||
ATOMIC_OP_EX(device, glsl_op, mtl_op)
|
||||
|
||||
/* Device memory. */
|
||||
template<typename T> T atomicMax(device T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_max_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicMin(device T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_min_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicAdd(device T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_add_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicSub(device T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_sub_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicAnd(device T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_and_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicOr(device T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_or_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicXor(device T &mem, T data)
|
||||
{
|
||||
return atomic_fetch_xor_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
template<typename T> T atomicExchange(device T &mem, T data)
|
||||
{
|
||||
return atomic_exchange_explicit((device _atomic<T> *)&mem, data, memory_order_relaxed);
|
||||
}
|
||||
ATOMIC_OP(Max, fetch_max)
|
||||
ATOMIC_OP(Min, fetch_min)
|
||||
ATOMIC_OP(Add, fetch_add)
|
||||
ATOMIC_OP(Sub, fetch_sub)
|
||||
ATOMIC_OP(And, fetch_and)
|
||||
ATOMIC_OP(Or, fetch_or)
|
||||
ATOMIC_OP(Xor, fetch_xor)
|
||||
ATOMIC_OP(Exchange, exchange)
|
||||
|
||||
/* Used to replace 'out' in function parameters with thread-local reference
|
||||
* shortened to avoid expanding the GLSL source string. */
|
||||
#define THD thread
|
||||
#define OUT(type, name, array) thread type(&name)[array]
|
||||
#undef ATOMIC_OP
|
||||
|
||||
/* Generate wrapper structs for combined texture and sampler type. */
|
||||
#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
|
||||
# define SAMPLER_DECLARATION constant sampler *samp;
|
||||
using sampler_ptr = constant sampler *;
|
||||
#else
|
||||
# define SAMPLER_DECLARATION thread sampler *samp;
|
||||
using sampler_ptr = thread sampler *;
|
||||
#endif
|
||||
|
||||
#define COMBINED_SAMPLER_TYPE(STRUCT_NAME, TEX_TYPE) \
|
||||
template<typename T, access A = access::sample> struct STRUCT_NAME { \
|
||||
thread TEX_TYPE<T, A> *texture; \
|
||||
SAMPLER_DECLARATION \
|
||||
/* Use point sampler instead of texture read to benefit from texture caching and reduce branching
|
||||
* through removal of bounds tests, as these are handled by the sample operation. */
|
||||
constexpr sampler _mtl_fetch_samp(address::clamp_to_zero, filter::nearest, coord::pixel);
|
||||
|
||||
template<typename T,
|
||||
access A,
|
||||
typename TextureT,
|
||||
bool is_depth,
|
||||
int Dim,
|
||||
int Cube,
|
||||
int Array,
|
||||
bool Atomic = false>
|
||||
struct _mtl_sampler {
|
||||
|
||||
template<typename U, int S>
|
||||
using vec_or_scalar = typename metal::conditional<S == 1, U, vec<U, S>>::type;
|
||||
|
||||
using FltCoord = vec_or_scalar<float, Dim + Cube + Array>;
|
||||
using FltDeriv = vec_or_scalar<float, Dim + Cube>;
|
||||
using IntCoord = vec_or_scalar<int, Dim + Cube + Array>;
|
||||
using IntDeriv = vec_or_scalar<int, Dim + Cube>;
|
||||
using UintCoord = vec_or_scalar<uint, Dim + Cube + Array>;
|
||||
using UintDeriv = vec_or_scalar<uint, Dim + Cube>;
|
||||
using SizeVec = vec_or_scalar<int, Dim + Array>;
|
||||
using DataVec = vec<T, 4>;
|
||||
using AtomicT = T;
|
||||
|
||||
/* Template compatible gradient type choosing. */
|
||||
template<int D, int C> struct gradient_n {};
|
||||
/* clang-format off */
|
||||
template<> struct gradient_n<2, 0> { using type = gradient2d; };
|
||||
template<> struct gradient_n<3, 0> { using type = gradient3d; };
|
||||
template<> struct gradient_n<2, 1> { using type = gradientcube; };
|
||||
/* clang-format on */
|
||||
/* Using `using` would invalidate the whole class. */
|
||||
#define gradient typename gradient_n<Dim, Cube>::type
|
||||
|
||||
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
|
||||
/* If native texture atomics are unsupported, we instead utilize a custom type which wraps a
|
||||
* buffer-backed texture. This texture will always be a Texture2D, but will emulate access to
|
||||
* Texture3D and Texture2DArray by stacking layers.
|
||||
* Access pattern will be derived based on the source type. 2DArray and 3D atomic texture
|
||||
* support will require information on the size of each layer within the source 2D texture.
|
||||
*
|
||||
* A device pointer to the backing buffer will also be available for the atomic operations.
|
||||
* NOTE: For atomic ops, it will only be valid to use access::read_write.
|
||||
* We still need to use the wrapped type for access:sample, as texture2DArray and texture3D
|
||||
* will require access indirection.
|
||||
*
|
||||
* NOTE: Only type of UINT is valid, but full template provided to match syntax of standard
|
||||
* textures. */
|
||||
template<typename U, int D, bool At> struct AtomicEmulation {};
|
||||
|
||||
template<typename U> struct AtomicEmulation<U, 2, true> {
|
||||
/** Buffer to do atomic operations on. */
|
||||
device U *buffer;
|
||||
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater
|
||||
* than the texture's native width to satisfy device alignment rules. We need to use the padded
|
||||
* width when writing to ensure the correct writing location aligns with a given pixel location
|
||||
* in the texture. */
|
||||
ushort aligned_width;
|
||||
|
||||
int2 to_internal_coord(IntCoord coord) const
|
||||
{
|
||||
return coord.xy;
|
||||
}
|
||||
uint to_linear_coord(IntCoord coord) const
|
||||
{
|
||||
return coord.x + coord.y * aligned_width;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename U> struct AtomicEmulation<U, 3, true> {
|
||||
/** Buffer to do atomic operations on. */
|
||||
device U *buffer;
|
||||
/** Required to pixel location inside the backing texture 2D space, and texture size query. */
|
||||
ushort3 texture_size;
|
||||
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater
|
||||
* than the texture's native width to satisfy device alignment rules. We need to use the padded
|
||||
* width when writing to ensure the correct writing location aligns with a given pixel location
|
||||
* in the texture. */
|
||||
ushort aligned_width;
|
||||
|
||||
int2 to_internal_coord(IntCoord coord) const
|
||||
{
|
||||
return int2(coord.x, coord.y + texture_size.y * coord.z);
|
||||
}
|
||||
uint to_linear_coord(IntCoord coord) const
|
||||
{
|
||||
uint row = coord.y + coord.z * texture_size.y;
|
||||
return coord.x + row * aligned_width;
|
||||
}
|
||||
};
|
||||
|
||||
AtomicEmulation<T, Dim + Array, Atomic> atomic;
|
||||
#endif
|
||||
|
||||
thread TextureT *texture;
|
||||
sampler_ptr samp;
|
||||
|
||||
template<int dim, int array, bool At = false> SizeVec size_impl(uint lod) const {}
|
||||
template<> SizeVec size_impl<1, 0>(uint lod) const
|
||||
{
|
||||
return SizeVec(texture->get_width());
|
||||
}
|
||||
template<> SizeVec size_impl<1, 1>(uint lod) const
|
||||
{
|
||||
return SizeVec(texture->get_width(), texture->get_array_size());
|
||||
}
|
||||
template<> SizeVec size_impl<2, 0>(uint lod) const
|
||||
{
|
||||
return SizeVec(texture->get_width(lod), texture->get_height(lod));
|
||||
}
|
||||
template<> SizeVec size_impl<2, 1>(uint lod) const
|
||||
{
|
||||
return SizeVec(texture->get_width(lod), texture->get_height(lod), texture->get_array_size());
|
||||
}
|
||||
template<> SizeVec size_impl<3, 0>(uint lod) const
|
||||
{
|
||||
return SizeVec(texture->get_width(lod), texture->get_height(lod), texture->get_depth(lod));
|
||||
}
|
||||
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
|
||||
template<> SizeVec size_impl<2, 1, true>(uint lod) const
|
||||
{
|
||||
return SizeVec(atomic.texture_size);
|
||||
}
|
||||
template<> SizeVec size_impl<3, 0, true>(uint lod) const
|
||||
{
|
||||
return SizeVec(atomic.texture_size);
|
||||
}
|
||||
#endif
|
||||
SizeVec size(int lod = 0) const
|
||||
{
|
||||
return size_impl<Dim, Array, Atomic>(uint(lod));
|
||||
}
|
||||
|
||||
/* If native texture atomics are unsupported, we instead utilize a custom type which wraps a
|
||||
* buffer-backed texture. This texture will always be a Texture2D, but will emulate access to
|
||||
* Texture3D and Texture2DArray by stacking layers.
|
||||
* Access pattern will be derived based on the source type. 2DArray and 3D atomic texture
|
||||
* support will require information on the size of each layer within the source 2D texture.
|
||||
*
|
||||
* A device pointer to the backing buffer will also be available for the atomic operations.
|
||||
* NOTE: For atomic ops, it will only be valid to use access::read_write.
|
||||
* We still need to use the wrapped type for access:sample, as texture2DArray and texture3D
|
||||
* will require access indirection.
|
||||
*
|
||||
* NOTE: Only type of UINT is valid, but full template provided to match syntax of standard
|
||||
* textures. */
|
||||
#define ARRAY_FN \
|
||||
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 1), ENABLE_IF(At == false)>
|
||||
#define NON_ARRAY_FN \
|
||||
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 0), ENABLE_IF(At == false)>
|
||||
|
||||
NON_ARRAY_FN DataVec sample(FltCoord coord) const
|
||||
{
|
||||
return texture->sample(*samp, coord);
|
||||
}
|
||||
NON_ARRAY_FN DataVec sample_grad(FltCoord coord, FltDeriv dPdx, FltDeriv dPdy) const
|
||||
{
|
||||
return texture->sample(*samp, coord, gradient(dPdx, dPdy));
|
||||
}
|
||||
NON_ARRAY_FN DataVec sample_bias(FltCoord coord, bias lod_bias) const
|
||||
{
|
||||
return texture->sample(*samp, coord, lod_bias);
|
||||
}
|
||||
NON_ARRAY_FN DataVec sample_lod(FltCoord coord, level lod, IntDeriv offset = {0}) const
|
||||
{
|
||||
return texture->sample(*samp, coord, lod, offset);
|
||||
}
|
||||
NON_ARRAY_FN DataVec gather(FltCoord coord) const
|
||||
{
|
||||
return texture->gather(*samp, coord);
|
||||
}
|
||||
NON_ARRAY_FN DataVec fetch(IntCoord coord) const
|
||||
{
|
||||
return texture->sample(_mtl_fetch_samp, FltCoord(coord));
|
||||
}
|
||||
NON_ARRAY_FN DataVec fetch(IntCoord coord, level lod, IntDeriv offset = {0}) const
|
||||
{
|
||||
return texture->sample(_mtl_fetch_samp, FltCoord(coord), lod, offset);
|
||||
}
|
||||
|
||||
ARRAY_FN DataVec sample(FltCoord coord) const
|
||||
{
|
||||
return texture->sample(*samp, uv_mask(coord), layer_mask(coord));
|
||||
}
|
||||
ARRAY_FN DataVec sample_grad(FltCoord coord, FltDeriv dPdx, FltDeriv dPdy) const
|
||||
{
|
||||
return texture->sample(*samp, uv_mask(coord), layer_mask(coord), gradient(dPdx, dPdy));
|
||||
}
|
||||
ARRAY_FN DataVec sample_bias(FltCoord coord, bias lod_bias) const
|
||||
{
|
||||
return texture->sample(*samp, uv_mask(coord), layer_mask(coord), lod_bias);
|
||||
}
|
||||
ARRAY_FN DataVec sample_lod(FltCoord coord, level lod, IntDeriv offset = {0}) const
|
||||
{
|
||||
return texture->sample(*samp, uv_mask(coord), layer_mask(coord), lod, offset);
|
||||
}
|
||||
ARRAY_FN DataVec gather(FltCoord coord) const
|
||||
{
|
||||
return texture->gather(*samp, uv_mask(coord), layer_mask(coord));
|
||||
}
|
||||
ARRAY_FN DataVec fetch(IntCoord coord) const
|
||||
{
|
||||
return texture->sample(_mtl_fetch_samp, uv_mask(coord), layer_mask(coord));
|
||||
}
|
||||
ARRAY_FN DataVec fetch(IntCoord coord, level lod, IntDeriv ofs = {0}) const
|
||||
{
|
||||
return texture->sample(_mtl_fetch_samp, uv_mask(coord), layer_mask(coord), lod, ofs);
|
||||
}
|
||||
|
||||
#undef gradient
|
||||
#undef ARRAY_FN
|
||||
#undef NON_ARRAY_FN
|
||||
|
||||
/**
|
||||
* Image functions.
|
||||
* To be split to its own class.
|
||||
*/
|
||||
|
||||
#define ARRAY_FN \
|
||||
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 1), ENABLE_IF(At == false)>
|
||||
#define NON_ARRAY_FN \
|
||||
template<int Ar = Array, bool At = Atomic, ENABLE_IF(Ar == 0), ENABLE_IF(At == false)>
|
||||
|
||||
NON_ARRAY_FN DataVec load(IntCoord coord) const
|
||||
{
|
||||
return texture->read(UintCoord(coord), 0);
|
||||
}
|
||||
NON_ARRAY_FN void store(DataVec data, IntCoord coord) const
|
||||
{
|
||||
texture->write(data, UintCoord(coord), 0);
|
||||
}
|
||||
|
||||
ARRAY_FN DataVec load(IntCoord coord) const
|
||||
{
|
||||
return texture->read(uv_mask_img(coord), layer_mask_img(coord), 0);
|
||||
}
|
||||
ARRAY_FN void store(DataVec data, IntCoord coord) const
|
||||
{
|
||||
texture->write(data, uv_mask_img(coord), layer_mask_img(coord), 0);
|
||||
}
|
||||
|
||||
#undef ARRAY_FN
|
||||
#undef NON_ARRAY_FN
|
||||
|
||||
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
|
||||
/* Atomic samplers only support `textureFetch` as the texture layout doesn't allow filtering. */
|
||||
|
||||
template<typename T, access A = access::sample>
|
||||
struct _mtl_combined_image_sampler_2d_atomic_fallback {
|
||||
thread texture2d<T, A> *texture;
|
||||
SAMPLER_DECLARATION
|
||||
device T *buffer;
|
||||
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than
|
||||
* the texture's native width to satisfy device alignment rules. We need to use the padded width
|
||||
* when writing to ensure the
|
||||
* correct writing location aligns with a given pixel location in the texture. */
|
||||
uint aligned_width;
|
||||
};
|
||||
# define ATOMIC_FN template<bool At = Atomic, ENABLE_IF(At == true)>
|
||||
|
||||
template<typename T, access A = access::sample>
|
||||
struct _mtl_combined_image_sampler_2d_array_atomic_fallback {
|
||||
thread texture2d<T, A> *texture;
|
||||
SAMPLER_DECLARATION
|
||||
device T *buffer;
|
||||
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than
|
||||
* the texture's native width to satisfy device alignment rules. We need to use the padded width
|
||||
* when writing to ensure the
|
||||
* correct writing location aligns with a given pixel location in the texture. */
|
||||
uint aligned_width;
|
||||
/* Texture size required to determine location offset of array layer with 2D texture space. */
|
||||
ushort3 texture_size;
|
||||
};
|
||||
ATOMIC_FN DataVec fetch(IntCoord coord) const
|
||||
{
|
||||
int2 coord_2d = atomic.to_internal_coord(coord);
|
||||
return texture->sample(_mtl_fetch_samp, float2(coord_2d));
|
||||
}
|
||||
ATOMIC_FN DataVec fetch(IntCoord coord, level lod, IntDeriv offset = {0}) const
|
||||
{
|
||||
int2 coord_2d = atomic.to_internal_coord(coord);
|
||||
return texture->sample(_mtl_fetch_samp, float2(coord_2d), lod, offset);
|
||||
}
|
||||
|
||||
template<typename T, access A = access::sample>
|
||||
struct _mtl_combined_image_sampler_3d_atomic_fallback {
|
||||
thread texture2d<T, A> *texture;
|
||||
SAMPLER_DECLARATION
|
||||
device T *buffer;
|
||||
/* Aligned width matches the number of buffer elements in bytes_per_row. This may be greater than
|
||||
* the texture's native width to satisfy device alignment rules. We need to use the padded width
|
||||
* when writing to ensure the
|
||||
* correct writing location aligns with a given pixel location in the texture. */
|
||||
uint aligned_width;
|
||||
/* Texture size required to determine location offset of array layer with 2D texture space. */
|
||||
ushort3 texture_size;
|
||||
};
|
||||
ATOMIC_FN DataVec load(IntCoord coord) const
|
||||
{
|
||||
int2 coord_2d = atomic.to_internal_coord(coord);
|
||||
return texture->read(uint2(coord_2d), 0);
|
||||
}
|
||||
ATOMIC_FN void store(DataVec data, IntCoord coord) const
|
||||
{
|
||||
int2 coord_2d = atomic.to_internal_coord(coord);
|
||||
texture->write(data, uint2(coord_2d), 0);
|
||||
}
|
||||
|
||||
# undef ATOMIC_FN
|
||||
|
||||
AtomicT atomic_min(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return atomicMin(atomic.buffer[atomic.to_linear_coord(coord)], data);
|
||||
}
|
||||
AtomicT atomic_max(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return atomicMax(atomic.buffer[atomic.to_linear_coord(coord)], data);
|
||||
}
|
||||
AtomicT atomic_add(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return atomicAdd(atomic.buffer[atomic.to_linear_coord(coord)], data);
|
||||
}
|
||||
AtomicT atomic_and(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return atomicAnd(atomic.buffer[atomic.to_linear_coord(coord)], data);
|
||||
}
|
||||
AtomicT atomic_or(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return atomicOr(atomic.buffer[atomic.to_linear_coord(coord)], data);
|
||||
}
|
||||
AtomicT atomic_xor(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return atomicXor(atomic.buffer[atomic.to_linear_coord(coord)], data);
|
||||
}
|
||||
AtomicT atomic_exchange(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return atomicExchange(atomic.buffer[atomic.to_linear_coord(coord)], data);
|
||||
}
|
||||
|
||||
#else
|
||||
# define NON_ARRAY_ATOMIC \
|
||||
template<typename U = T, \
|
||||
int Ar = Array, \
|
||||
ENABLE_IF(metal::is_integral_v<U> == true), \
|
||||
ENABLE_IF(sizeof(U) == 4), \
|
||||
ENABLE_IF(Ar == 0)>
|
||||
# define ARRAY_ATOMIC \
|
||||
template<typename U = T, \
|
||||
int Ar = Array, \
|
||||
ENABLE_IF(metal::is_integral_v<U> == true), \
|
||||
ENABLE_IF(sizeof(U) == 4), \
|
||||
ENABLE_IF(Ar == 1)>
|
||||
|
||||
NON_ARRAY_ATOMIC AtomicT atomic_min(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_min(UintCoord(coord), data).x;
|
||||
}
|
||||
NON_ARRAY_ATOMIC AtomicT atomic_max(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_max(UintCoord(coord), data).x;
|
||||
}
|
||||
NON_ARRAY_ATOMIC AtomicT atomic_add(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_add(UintCoord(coord), data).x;
|
||||
}
|
||||
NON_ARRAY_ATOMIC AtomicT atomic_and(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_and(UintCoord(coord), data).x;
|
||||
}
|
||||
NON_ARRAY_ATOMIC AtomicT atomic_or(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_or(UintCoord(coord), data).x;
|
||||
}
|
||||
NON_ARRAY_ATOMIC AtomicT atomic_xor(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_xor(UintCoord(coord), data).x;
|
||||
}
|
||||
NON_ARRAY_ATOMIC AtomicT atomic_exchange(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_exchange(UintCoord(coord), data).x;
|
||||
}
|
||||
|
||||
ARRAY_ATOMIC AtomicT atomic_min(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_min(uv_mask_img(coord), layer_mask_img(coord), data).x;
|
||||
}
|
||||
ARRAY_ATOMIC AtomicT atomic_max(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_max(uv_mask_img(coord), layer_mask_img(coord), data).x;
|
||||
}
|
||||
ARRAY_ATOMIC AtomicT atomic_add(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_add(uv_mask_img(coord), layer_mask_img(coord), data).x;
|
||||
}
|
||||
ARRAY_ATOMIC AtomicT atomic_and(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_and(uv_mask_img(coord), layer_mask_img(coord), data).x;
|
||||
}
|
||||
ARRAY_ATOMIC AtomicT atomic_or(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_or(uv_mask_img(coord), layer_mask_img(coord), data).x;
|
||||
}
|
||||
ARRAY_ATOMIC AtomicT atomic_xor(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_fetch_xor(uv_mask_img(coord), layer_mask_img(coord), data).x;
|
||||
}
|
||||
ARRAY_ATOMIC AtomicT atomic_exchange(IntCoord coord, AtomicT data) const
|
||||
{
|
||||
return texture->atomic_exchange(uv_mask_img(coord), layer_mask_img(coord), data).x;
|
||||
}
|
||||
|
||||
# undef NON_ARRAY_ATOMIC
|
||||
# undef ARRAY_ATOMIC
|
||||
#endif
|
||||
|
||||
void fence()
|
||||
{
|
||||
texture->fence();
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename U, typename V> static U reshape(V v) {}
|
||||
/* clang-format off */
|
||||
template<> float reshape<float>(float2 v) { return v.x; }
|
||||
template<> float2 reshape<float2>(float3 v) { return v.xy; }
|
||||
template<> float3 reshape<float3>(float4 v) { return v.xyz; }
|
||||
template<> int reshape<int>(int2 v) { return v.x; }
|
||||
template<> int2 reshape<int2>(int3 v) { return v.xy; }
|
||||
template<> int3 reshape<int3>(int4 v) { return v.xyz; }
|
||||
/* clang-format on */
|
||||
|
||||
FltDeriv uv_mask(FltCoord coord) const
|
||||
{
|
||||
return reshape<FltDeriv>(coord);
|
||||
}
|
||||
FltDeriv uv_mask(IntCoord coord) const
|
||||
{
|
||||
return FltDeriv(reshape<IntDeriv>(coord));
|
||||
}
|
||||
|
||||
uint layer_mask(FltCoord coord) const
|
||||
{
|
||||
return coord[Dim + Cube];
|
||||
}
|
||||
uint layer_mask(IntCoord coord) const
|
||||
{
|
||||
return coord[Dim + Cube];
|
||||
}
|
||||
|
||||
UintDeriv uv_mask_img(IntCoord coord) const
|
||||
{
|
||||
return UintDeriv(reshape<IntDeriv>(coord));
|
||||
}
|
||||
|
||||
uint layer_mask_img(IntCoord coord) const
|
||||
{
|
||||
return coord[Dim + Cube];
|
||||
}
|
||||
};
|
||||
|
||||
/** Sampler functions */
|
||||
|
||||
#define SAMPLER_FN \
|
||||
template<typename SamplerT, \
|
||||
typename FltCoord = typename SamplerT::FltCoord, \
|
||||
typename FltDeriv = typename SamplerT::FltDeriv, \
|
||||
typename IntCoord = typename SamplerT::IntCoord, \
|
||||
typename IntDeriv = typename SamplerT::IntDeriv, \
|
||||
typename UintCoord = typename SamplerT::UintCoord, \
|
||||
typename UintDeriv = typename SamplerT::UintDeriv, \
|
||||
typename SizeVec = typename SamplerT::SizeVec, \
|
||||
typename DataVec = typename SamplerT::DataVec>
|
||||
|
||||
SAMPLER_FN SizeVec textureSize(SamplerT texture, int lod)
|
||||
{
|
||||
return texture.size(lod);
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec texture(SamplerT texture, FltCoord coord)
|
||||
{
|
||||
return texture.sample(coord);
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec texture(SamplerT texture, FltCoord coord, float lod_bias)
|
||||
{
|
||||
return texture.sample_bias(coord, bias(lod_bias));
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec textureLod(SamplerT texture, FltCoord coord, float lod)
|
||||
{
|
||||
return texture.sample_lod(coord, level(lod));
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec textureLodOffset(SamplerT texture, FltCoord coord, float lod, IntDeriv offset)
|
||||
{
|
||||
return texture.sample_lod(coord, level(lod), offset);
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec textureGather(SamplerT texture, FltCoord coord)
|
||||
{
|
||||
return texture.gather(coord);
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec textureGrad(SamplerT texture, FltCoord coord, FltDeriv dPdx, FltDeriv dPdy)
|
||||
{
|
||||
return texture.sample_grad(coord, dPdx, dPdy);
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec texelFetch(SamplerT texture, IntCoord coord, int lod)
|
||||
{
|
||||
return texture.fetch(coord, level(lod));
|
||||
}
|
||||
|
||||
SAMPLER_FN DataVec texelFetchOffset(SamplerT texture, IntCoord coord, int lod, IntDeriv offset)
|
||||
{
|
||||
return texture.fetch(coord, level(lod), offset);
|
||||
}
|
||||
|
||||
#undef SAMPLER_FN
|
||||
|
||||
/** Image functions */
|
||||
|
||||
#define IMAGE_FN \
|
||||
template<typename SamplerT, \
|
||||
typename IntCoord = typename SamplerT::IntCoord, \
|
||||
typename IntDeriv = typename SamplerT::IntDeriv, \
|
||||
typename UintCoord = typename SamplerT::UintCoord, \
|
||||
typename UintDeriv = typename SamplerT::UintDeriv, \
|
||||
typename SizeVec = typename SamplerT::SizeVec, \
|
||||
typename DataVec = typename SamplerT::DataVec, \
|
||||
typename AtomicT = typename SamplerT::AtomicT>
|
||||
|
||||
IMAGE_FN SizeVec imageSize(SamplerT texture)
|
||||
{
|
||||
return texture.size();
|
||||
}
|
||||
|
||||
IMAGE_FN void imageFence(SamplerT texture)
|
||||
{
|
||||
return texture.fence();
|
||||
}
|
||||
|
||||
IMAGE_FN DataVec imageLoad(SamplerT texture, IntCoord coord)
|
||||
{
|
||||
if (any(UintCoord(coord) >= UintCoord(texture.size()))) {
|
||||
return DataVec(0);
|
||||
}
|
||||
return texture.load(coord);
|
||||
}
|
||||
|
||||
IMAGE_FN DataVec imageLoadFast(SamplerT texture, IntCoord coord)
|
||||
{
|
||||
return texture.load(coord);
|
||||
}
|
||||
|
||||
IMAGE_FN void imageStore(SamplerT texture, IntCoord coord, DataVec data)
|
||||
{
|
||||
if (any(UintCoord(coord) >= UintCoord(texture.size()))) {
|
||||
return;
|
||||
}
|
||||
texture.store(data, coord);
|
||||
}
|
||||
|
||||
IMAGE_FN
|
||||
void imageStoreFast(SamplerT texture, IntCoord coord, DataVec data)
|
||||
{
|
||||
texture.store(data, coord);
|
||||
}
|
||||
|
||||
IMAGE_FN AtomicT imageAtomicMin(SamplerT texture, IntCoord coord, AtomicT data)
|
||||
{
|
||||
return texture.atomic_min(coord, data);
|
||||
}
|
||||
IMAGE_FN AtomicT imageAtomicMax(SamplerT texture, IntCoord coord, AtomicT data)
|
||||
{
|
||||
return texture.atomic_max(coord, data);
|
||||
}
|
||||
IMAGE_FN AtomicT imageAtomicAdd(SamplerT texture, IntCoord coord, AtomicT data)
|
||||
{
|
||||
return texture.atomic_add(coord, data);
|
||||
}
|
||||
IMAGE_FN AtomicT imageAtomicAnd(SamplerT texture, IntCoord coord, AtomicT data)
|
||||
{
|
||||
return texture.atomic_and(coord, data);
|
||||
}
|
||||
IMAGE_FN AtomicT imageAtomicOr(SamplerT texture, IntCoord coord, AtomicT data)
|
||||
{
|
||||
return texture.atomic_or(coord, data);
|
||||
}
|
||||
IMAGE_FN AtomicT imageAtomicXor(SamplerT texture, IntCoord coord, AtomicT data)
|
||||
{
|
||||
return texture.atomic_xor(coord, data);
|
||||
}
|
||||
IMAGE_FN AtomicT imageAtomicExchange(SamplerT texture, IntCoord coord, AtomicT data)
|
||||
{
|
||||
return texture.atomic_exchange(coord, data);
|
||||
}
|
||||
|
||||
#undef IMAGE_FN
|
||||
|
||||
/* Add any types as needed. */
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d, texture1d);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_1d_array, texture1d_array);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d, texture2d);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d, depth2d);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_2d_array, texture2d_array);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_2d_array, depth2d_array);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_3d, texture3d);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_buffer, texture_buffer);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube, texturecube);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_cube_array, texturecube_array);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube, texturecube_array);
|
||||
COMBINED_SAMPLER_TYPE(_mtl_combined_image_sampler_depth_cube_array, texturecube_array);
|
||||
#define TEMPLATE template<typename T = float, access A = access::sample>
|
||||
TEMPLATE using depth2D = _mtl_sampler<T, A, depth2d<T, A>, true, 2, 0, 0>;
|
||||
TEMPLATE using depth2DArray = _mtl_sampler<T, A, depth2d_array<T, A>, true, 2, 0, 1>;
|
||||
TEMPLATE using depthCube = _mtl_sampler<T, A, texturecube<T, A>, true, 2, 1, 1>;
|
||||
TEMPLATE using depthCubeArray = _mtl_sampler<T, A, texturecube_array<T, A>, true, 2, 1, 1>;
|
||||
TEMPLATE using sampler1D = _mtl_sampler<T, A, texture1d<T, A>, false, 1, 0, 0>;
|
||||
TEMPLATE using sampler1DArray = _mtl_sampler<T, A, texture1d_array<T, A>, false, 1, 0, 1>;
|
||||
TEMPLATE using sampler2D = _mtl_sampler<T, A, texture2d<T, A>, false, 2, 0, 0>;
|
||||
TEMPLATE using sampler2DArray = _mtl_sampler<T, A, texture2d_array<T, A>, false, 2, 0, 1>;
|
||||
TEMPLATE using sampler3D = _mtl_sampler<T, A, texture3d<T, A>, false, 3, 0, 0>;
|
||||
TEMPLATE using samplerBuffer = _mtl_sampler<T, A, texture_buffer<T, A>, false, 1, 0, 0>;
|
||||
TEMPLATE using samplerCube = _mtl_sampler<T, A, texturecube<T, A>, false, 2, 1, 0>;
|
||||
TEMPLATE using samplerCubeArray = _mtl_sampler<T, A, texturecube_array<T, A>, false, 2, 1, 1>;
|
||||
/* Atomic textures are defined as 2D textures with special layout for 3D texture emulation. */
|
||||
TEMPLATE using sampler2DAtomic = _mtl_sampler<T, A, texture2d<T, A>, false, 2, 0, 0, true>;
|
||||
TEMPLATE using sampler2DArrayAtomic = _mtl_sampler<T, A, texture2d<T, A>, false, 2, 0, 1, true>;
|
||||
TEMPLATE using sampler3DAtomic = _mtl_sampler<T, A, texture2d<T, A>, false, 3, 0, 0, true>;
|
||||
|
||||
/* Used by backend to declare the samplers. Could be removed. */
|
||||
TEMPLATE using _mtl_sampler_depth_2d = depth2D<T, A>;
|
||||
TEMPLATE using _mtl_sampler_depth_2d_array = depth2DArray<T, A>;
|
||||
TEMPLATE using _mtl_sampler_depth_cube = depthCube<T, A>;
|
||||
TEMPLATE using _mtl_sampler_depth_cube_array = depthCubeArray<T, A>;
|
||||
TEMPLATE using _mtl_sampler_1d = sampler1D<T, A>;
|
||||
TEMPLATE using _mtl_sampler_1d_array = sampler1DArray<T, A>;
|
||||
TEMPLATE using _mtl_sampler_2d = sampler2D<T, A>;
|
||||
TEMPLATE using _mtl_sampler_2d_array = sampler2DArray<T, A>;
|
||||
TEMPLATE using _mtl_sampler_3d = sampler3D<T, A>;
|
||||
TEMPLATE using _mtl_sampler_buffer = samplerBuffer<T, A>;
|
||||
TEMPLATE using _mtl_sampler_cube = samplerCube<T, A>;
|
||||
TEMPLATE using _mtl_sampler_cube_array = samplerCubeArray<T, A>;
|
||||
TEMPLATE using _mtl_sampler_2d_atomic = sampler2DAtomic<T, A>;
|
||||
TEMPLATE using _mtl_sampler_2d_array_atomic = sampler2DArrayAtomic<T, A>;
|
||||
TEMPLATE using _mtl_sampler_3d_atomic = sampler3DAtomic<T, A>;
|
||||
#undef TEMPLATE
|
||||
|
||||
/* Variant for 1D samplers. Discard the lod. */
|
||||
template<typename T, access A>
|
||||
typename sampler1D<T, A>::DataVec texelFetch(sampler1D<T, A> texture, int coord, int lod = 0)
|
||||
{
|
||||
return texture.fetch(coord);
|
||||
}
|
||||
|
||||
/* Variant for 1DArray samplers. Discard the lod. */
|
||||
template<typename T, access A>
|
||||
typename sampler1DArray<T, A>::DataVec texelFetch(sampler1DArray<T, A> texture,
|
||||
int2 coord,
|
||||
int lod = 0)
|
||||
{
|
||||
return texture.fetch(coord);
|
||||
}
|
||||
|
||||
/* Variant for buffer samplers. Discard the lod. */
|
||||
template<typename T, access A>
|
||||
typename samplerBuffer<T, A>::DataVec texelFetch(samplerBuffer<T, A> texture,
|
||||
int coord,
|
||||
int lod = 0)
|
||||
{
|
||||
uint texel = uint(coord);
|
||||
if (texel < texture.texture->get_width()) {
|
||||
return texture.texture->read(texel);
|
||||
}
|
||||
return typename samplerBuffer<T, A>::DataVec(0);
|
||||
}
|
||||
|
||||
/* Sampler struct for argument buffer. */
|
||||
#ifdef USE_ARGUMENT_BUFFER_FOR_SAMPLERS
|
||||
@@ -283,51 +742,51 @@ struct SStruct {
|
||||
#endif
|
||||
|
||||
/* Samplers as function parameters. */
|
||||
#define sampler1D thread _mtl_combined_image_sampler_1d<float>
|
||||
#define sampler1DArray thread _mtl_combined_image_sampler_1d_array<float>
|
||||
#define sampler2D thread _mtl_combined_image_sampler_2d<float>
|
||||
#define depth2D thread _mtl_combined_image_sampler_depth_2d<float>
|
||||
#define sampler2DArray thread _mtl_combined_image_sampler_2d_array<float>
|
||||
#define sampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
|
||||
#define depth2DArray thread _mtl_combined_image_sampler_depth_2d_array<float>
|
||||
#define depth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<float>
|
||||
#define sampler3D thread _mtl_combined_image_sampler_3d<float>
|
||||
#define samplerBuffer thread _mtl_combined_image_sampler_buffer<float, access::read>
|
||||
#define samplerCube thread _mtl_combined_image_sampler_cube<float>
|
||||
#define samplerCubeArray thread _mtl_combined_image_sampler_cube_array<float>
|
||||
#define sampler1D thread _mtl_sampler_1d<float>
|
||||
#define sampler1DArray thread _mtl_sampler_1d_array<float>
|
||||
#define sampler2D thread _mtl_sampler_2d<float>
|
||||
#define depth2D thread _mtl_sampler_depth_2d<float>
|
||||
#define sampler2DArray thread _mtl_sampler_2d_array<float>
|
||||
#define sampler2DArrayShadow thread _mtl_sampler_depth_2d_array<float>
|
||||
#define depth2DArray thread _mtl_sampler_depth_2d_array<float>
|
||||
#define depth2DArrayShadow thread _mtl_sampler_depth_2d_array<float>
|
||||
#define sampler3D thread _mtl_sampler_3d<float>
|
||||
#define samplerBuffer thread _mtl_sampler_buffer<float, access::read>
|
||||
#define samplerCube thread _mtl_sampler_cube<float>
|
||||
#define samplerCubeArray thread _mtl_sampler_cube_array<float>
|
||||
|
||||
#define usampler1D thread _mtl_combined_image_sampler_1d<uint>
|
||||
#define usampler1DArray thread _mtl_combined_image_sampler_1d_array<uint>
|
||||
#define usampler2D thread _mtl_combined_image_sampler_2d<uint>
|
||||
#define udepth2D thread _mtl_combined_image_sampler_depth_2d<uint>
|
||||
#define usampler2DArray thread _mtl_combined_image_sampler_2d_array<uint>
|
||||
#define usampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
|
||||
#define udepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<uint>
|
||||
#define usampler3D thread _mtl_combined_image_sampler_3d<uint>
|
||||
#define usamplerBuffer thread _mtl_combined_image_sampler_buffer<uint, access::read>
|
||||
#define usamplerCube thread _mtl_combined_image_sampler_cube<uint>
|
||||
#define usamplerCubeArray thread _mtl_combined_image_sampler_cube_array<uint>
|
||||
#define usampler1D thread _mtl_sampler_1d<uint>
|
||||
#define usampler1DArray thread _mtl_sampler_1d_array<uint>
|
||||
#define usampler2D thread _mtl_sampler_2d<uint>
|
||||
#define udepth2D thread _mtl_sampler_depth_2d<uint>
|
||||
#define usampler2DArray thread _mtl_sampler_2d_array<uint>
|
||||
#define usampler2DArrayShadow thread _mtl_sampler_depth_2d_array<uint>
|
||||
#define udepth2DArrayShadow thread _mtl_sampler_depth_2d_array<uint>
|
||||
#define usampler3D thread _mtl_sampler_3d<uint>
|
||||
#define usamplerBuffer thread _mtl_sampler_buffer<uint, access::read>
|
||||
#define usamplerCube thread _mtl_sampler_cube<uint>
|
||||
#define usamplerCubeArray thread _mtl_sampler_cube_array<uint>
|
||||
|
||||
#define isampler1D thread _mtl_combined_image_sampler_1d<int>
|
||||
#define isampler1DArray thread _mtl_combined_image_sampler_1d_array<int>
|
||||
#define isampler2D thread _mtl_combined_image_sampler_2d<int>
|
||||
#define idepth2D thread _mtl_combined_image_sampler_depth_2d<int>
|
||||
#define isampler2DArray thread _mtl_combined_image_sampler_2d_array<int>
|
||||
#define isampler2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
|
||||
#define idepth2DArrayShadow thread _mtl_combined_image_sampler_depth_2d_array<int>
|
||||
#define isampler3D thread _mtl_combined_image_sampler_3d<int>
|
||||
#define isamplerBuffer thread _mtl_combined_image_sampler_buffer<int, access::read>
|
||||
#define isamplerCube thread _mtl_combined_image_sampler_cube<int>
|
||||
#define isamplerCubeArray thread _mtl_combined_image_sampler_cube_array<int>
|
||||
#define isampler1D thread _mtl_sampler_1d<int>
|
||||
#define isampler1DArray thread _mtl_sampler_1d_array<int>
|
||||
#define isampler2D thread _mtl_sampler_2d<int>
|
||||
#define idepth2D thread _mtl_sampler_depth_2d<int>
|
||||
#define isampler2DArray thread _mtl_sampler_2d_array<int>
|
||||
#define isampler2DArrayShadow thread _mtl_sampler_depth_2d_array<int>
|
||||
#define idepth2DArrayShadow thread _mtl_sampler_depth_2d_array<int>
|
||||
#define isampler3D thread _mtl_sampler_3d<int>
|
||||
#define isamplerBuffer thread _mtl_sampler_buffer<int, access::read>
|
||||
#define isamplerCube thread _mtl_sampler_cube<int>
|
||||
#define isamplerCubeArray thread _mtl_sampler_cube_array<int>
|
||||
|
||||
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
|
||||
/* If texture atomics are unsupported, map atomic types to internal atomic fallback type. */
|
||||
# define usampler2DArrayAtomic _mtl_combined_image_sampler_2d_array_atomic_fallback<uint>
|
||||
# define usampler2DAtomic _mtl_combined_image_sampler_2d_atomic_fallback<uint>
|
||||
# define usampler3DAtomic _mtl_combined_image_sampler_3d_atomic_fallback<uint>
|
||||
# define isampler2DArrayAtomic _mtl_combined_image_sampler_2d_array_atomic_fallback<int>
|
||||
# define isampler2DAtomic _mtl_combined_image_sampler_2d_atomic_fallback<int>
|
||||
# define isampler3DAtomic _mtl_combined_image_sampler_3d_atomic_fallback<int>
|
||||
# define usampler2DArrayAtomic _mtl_sampler_2d_array_atomic<uint>
|
||||
# define usampler2DAtomic _mtl_sampler_2d_atomic<uint>
|
||||
# define usampler3DAtomic _mtl_sampler_3d_atomic<uint>
|
||||
# define isampler2DArrayAtomic _mtl_sampler_2d_array_atomic<int>
|
||||
# define isampler2DAtomic _mtl_sampler_2d_atomic<int>
|
||||
# define isampler3DAtomic _mtl_sampler_3d_atomic<int>
|
||||
#else
|
||||
# define usampler2DArrayAtomic usampler2DArray
|
||||
# define usampler2DAtomic usampler2D
|
||||
@@ -340,1544 +799,32 @@ struct SStruct {
|
||||
/* Vector accessor aliases. */
|
||||
#define st xy
|
||||
|
||||
/* Texture functions. */
|
||||
#define texelFetch _texelFetch_internal
|
||||
#define texelFetchOffset(__tex, __texel, __lod, __offset) \
|
||||
_texelFetch_internal(__tex, __texel, __lod, __offset)
|
||||
#define imageLoad(__image, __coord) _texelFetch_internal(__image, __coord, 0)
|
||||
#define imageLoadFast(__image, __coord) _texelFetch_internal_fast(__image, __coord, 0)
|
||||
#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv)
|
||||
#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias)))
|
||||
#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod)))
|
||||
#define textureLodOffset(__tex, __uv, __lod, __offset) \
|
||||
_texture_internal_level(__tex, __uv, level(float(__lod)), __offset)
|
||||
#define textureGather2(__tex, __uv) _texture_gather_internal(__tex, __uv, 0)
|
||||
#define textureGather3(__tex, __uv, __comp) _texture_gather_internal(__tex, __uv, __comp)
|
||||
#define textureGatherOffset(__tex, __offset, __uv, __comp) \
|
||||
_texture_gather_internal(__tex, __uv, __comp, __offset)
|
||||
#define textureGrad(__tex, __uv, __dpdx, __dpdy) \
|
||||
_texture_grad_internal(__tex, __uv, __dpdx, __dpdy)
|
||||
|
||||
#define TEXURE_MACRO(_1, _2, _3, TEXNAME, ...) TEXNAME
|
||||
#define texture(...) TEXURE_MACRO(__VA_ARGS__, texture3, texture2)(__VA_ARGS__)
|
||||
#define textureGather(...) TEXURE_MACRO(__VA_ARGS__, textureGather3, textureGather2)(__VA_ARGS__)
|
||||
|
||||
/* Texture-write functions. */
|
||||
#define imageStore(_tex, _coord, _value) _texture_write_internal(_tex, _coord, _value)
|
||||
#define imageStoreFast(_tex, _coord, _value) _texture_write_internal_fast(_tex, _coord, _value)
|
||||
|
||||
/* Texture synchronization functions. */
|
||||
#define imageFence(image) image.texture->fence()
|
||||
|
||||
/* Singular return values from texture functions of type DEPTH are often indexed with either .r or
|
||||
* .x. This is a lightweight wrapper type for handling this syntax. */
|
||||
union _msl_return_float {
|
||||
float r;
|
||||
float x;
|
||||
inline operator float() const
|
||||
{
|
||||
return r;
|
||||
}
|
||||
};
|
||||
|
||||
/* Add custom texture sampling/reading routines for each type to account for special return cases,
|
||||
* e.g. returning a float with an r parameter Note: Cannot use template specialization for input
|
||||
* type, as return types are specific to the signature of 'tex'. */
|
||||
|
||||
/* Use point sampler instead of texture read to benefit from texture caching and reduce branching
|
||||
* through removal of bounds tests, as these are handled by the sample operation. */
|
||||
constexpr sampler _point_sample_(address::clamp_to_zero, filter::nearest, coord::pixel);
|
||||
|
||||
/* Texture Read via point sampling.
|
||||
* NOTE: These templates will evaluate first for texture resources bound with sample. */
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, access::sample> tex,
|
||||
T texel,
|
||||
uint lod = 0)
|
||||
{
|
||||
return tex.texture->sample(_point_sample_, float(texel));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, access::sample> tex,
|
||||
T texel,
|
||||
uint lod,
|
||||
T offset)
|
||||
{
|
||||
return tex.texture->sample(_point_sample_, float(texel + offset));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_1d_array<S, access::sample> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0, 0))
|
||||
{
|
||||
return tex.texture->sample(_point_sample_, float(texel.x + offset.x), uint(texel.y + offset.y));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, access::sample> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
return tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_2d_array<S, access::sample> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->sample(
|
||||
_point_sample_, float2(texel.xy + offset.xy), uint(texel.z + offset.z), level(lod));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, access::sample> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->sample(_point_sample_, float3(texel.xyz + offset.xyz), level(lod));
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline _msl_return_float _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
_msl_return_float fl = {
|
||||
tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod))};
|
||||
return fl;
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_2d_array<S, access::sample> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->sample(
|
||||
_point_sample_, float2(texel.xy + offset.xy), uint(texel.z + offset.z), level(lod));
|
||||
}
|
||||
|
||||
/* Texture Read via read operation. Required by compute/image-bindings. */
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T texel,
|
||||
uint lod = 0)
|
||||
{
|
||||
float w = tex.texture->get_width();
|
||||
if (texel >= 0 && texel < w) {
|
||||
return tex.texture->read(uint(texel));
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T texel,
|
||||
uint lod = 0)
|
||||
{
|
||||
return tex.texture->read(uint(texel));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel, uint lod = 0)
|
||||
{
|
||||
float w = tex.texture->get_width();
|
||||
if (texel >= 0 && texel < w) {
|
||||
return tex.texture->read(uint(texel));
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(
|
||||
const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel, uint lod = 0)
|
||||
{
|
||||
return tex.texture->read(uint(texel));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T texel,
|
||||
uint lod,
|
||||
T offset)
|
||||
{
|
||||
float w = tex.texture->get_width();
|
||||
if ((texel + offset) >= 0 && (texel + offset) < w) {
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->read(uint(texel + offset), 0);
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T texel,
|
||||
uint lod,
|
||||
T offset)
|
||||
{
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->read(uint(texel + offset), 0);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0, 0))
|
||||
{
|
||||
|
||||
float w = tex.texture->get_width();
|
||||
float h = tex.texture->get_array_size();
|
||||
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
|
||||
(texel.y + offset.y) < h)
|
||||
{
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0);
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0, 0))
|
||||
{
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
|
||||
float w = tex.texture->get_width() >> lod;
|
||||
float h = tex.texture->get_height() >> lod;
|
||||
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
|
||||
(texel.y + offset.y) < h)
|
||||
{
|
||||
return tex.texture->read(uint2(texel + offset), lod);
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
return tex.texture->read(uint2(texel + offset), lod);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
float w = tex.texture->get_width() >> lod;
|
||||
float h = tex.texture->get_height() >> lod;
|
||||
float d = tex.texture->get_array_size();
|
||||
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
|
||||
(texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d)
|
||||
{
|
||||
return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
|
||||
float w = tex.texture->get_width() >> lod;
|
||||
float h = tex.texture->get_height() >> lod;
|
||||
float d = tex.texture->get_depth() >> lod;
|
||||
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
|
||||
(texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d)
|
||||
{
|
||||
return tex.texture->read(uint3(texel + offset), lod);
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->read(uint3(texel + offset), lod);
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
inline _msl_return_float _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
|
||||
float w = tex.texture->get_width() >> lod;
|
||||
float h = tex.texture->get_height() >> lod;
|
||||
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
|
||||
(texel.y + offset.y) < h)
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)};
|
||||
return fl;
|
||||
}
|
||||
else {
|
||||
_msl_return_float fl = {0};
|
||||
return fl;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
inline _msl_return_float _texelFetch_internal_fast(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)};
|
||||
return fl;
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
|
||||
float w = tex.texture->get_width() >> lod;
|
||||
float h = tex.texture->get_height() >> lod;
|
||||
float d = tex.texture->get_array_size();
|
||||
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
|
||||
(texel.y + offset.y) < h && (texel.z + offset.z) >= 0 && (texel.z + offset.z) < d)
|
||||
{
|
||||
return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Sample. */
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_1d<T, access::sample> tex, float u)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, u);
|
||||
}
|
||||
|
||||
inline float4 _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex, float2 ua)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
|
||||
}
|
||||
|
||||
inline int4 _texture_internal_samp(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
|
||||
float2 uv)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv);
|
||||
}
|
||||
|
||||
inline uint4 _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_2d<uint, access::sample> tex, float2 uv)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv);
|
||||
}
|
||||
|
||||
inline float4 _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_2d<float, access::sample> tex, float2 uv)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv);
|
||||
}
|
||||
|
||||
inline _msl_return_float _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex, float2 uv)
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->sample(*tex.samp, uv)};
|
||||
return fl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_3d<T, access::sample> tex, float3 uvw)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uvw);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex, float3 uva)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z));
|
||||
}
|
||||
|
||||
inline _msl_return_float _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float3 uva)
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z))};
|
||||
return fl;
|
||||
}
|
||||
|
||||
inline _msl_return_float _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex, float4 uvac)
|
||||
{
|
||||
_msl_return_float fl = {
|
||||
tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0))};
|
||||
return fl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_cube<T, access::sample> tex, float3 uvs)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uvs.xyz);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_samp(
|
||||
thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex, float4 coord_a)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w));
|
||||
}
|
||||
|
||||
/* Sample Level. */
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
|
||||
float u,
|
||||
level options,
|
||||
int offset = 0)
|
||||
{
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->sample(*tex.samp, u);
|
||||
}
|
||||
|
||||
inline float4 _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_1d_array<float, access::sample> tex,
|
||||
float2 ua,
|
||||
level options,
|
||||
int offset = 0)
|
||||
{
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->sample(*tex.samp, ua.x, uint(ua.y));
|
||||
}
|
||||
|
||||
inline int4 _texture_internal_level(thread _mtl_combined_image_sampler_2d<int, access::sample> tex,
|
||||
float2 uv,
|
||||
level options,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv, options, offset);
|
||||
}
|
||||
|
||||
inline uint4 _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_2d<uint, access::sample> tex,
|
||||
float2 uv,
|
||||
level options,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv, options, offset);
|
||||
}
|
||||
|
||||
inline float4 _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
|
||||
float2 uv,
|
||||
level options,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv, options, offset);
|
||||
}
|
||||
|
||||
inline _msl_return_float _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
|
||||
float2 uv,
|
||||
level options,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
|
||||
return fl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_3d<T, access::sample> tex,
|
||||
float3 uvw,
|
||||
level options = level(0),
|
||||
int3 offset = int3(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uvw, options, offset);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
|
||||
float3 uva,
|
||||
level options = level(0),
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset);
|
||||
}
|
||||
|
||||
inline _msl_return_float _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
|
||||
float3 uva,
|
||||
level options = level(0),
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), options, offset)};
|
||||
return fl;
|
||||
}
|
||||
|
||||
inline _msl_return_float _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
|
||||
float4 uvac,
|
||||
level options = level(0),
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
_msl_return_float fl = {
|
||||
tex.texture->sample_compare(*tex.samp, uvac.xy, uint(uvac.z), uvac.w, level(0), offset)};
|
||||
return fl;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_cube<T, access::sample> tex,
|
||||
float3 uvs,
|
||||
level options = level(0),
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uvs.xyz, options);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_level(
|
||||
thread _mtl_combined_image_sampler_cube_array<T, access::sample> tex,
|
||||
float4 coord_a,
|
||||
level options = level(0),
|
||||
int3 offset = int3(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, coord_a.xyz, uint(coord_a.w), options);
|
||||
}
|
||||
|
||||
/* Sample Bias. */
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_internal_bias(
|
||||
thread _mtl_combined_image_sampler_1d<T, access::sample> tex,
|
||||
float u,
|
||||
bias options = bias(0.0),
|
||||
int offset = 0)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, u);
|
||||
}
|
||||
|
||||
inline float4 _texture_internal_bias(
|
||||
thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
|
||||
float2 uv,
|
||||
bias options = bias(0.0),
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv, options, offset);
|
||||
}
|
||||
|
||||
inline _msl_return_float _texture_internal_bias(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
|
||||
float2 uv,
|
||||
bias options = bias(0),
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->sample(*tex.samp, uv, options, offset)};
|
||||
return fl;
|
||||
}
|
||||
|
||||
/* Texture Gather. */
|
||||
component int_to_component(const int comp)
|
||||
{
|
||||
switch (comp) {
|
||||
default:
|
||||
case 0:
|
||||
return component::x;
|
||||
case 1:
|
||||
return component::y;
|
||||
case 2:
|
||||
return component::z;
|
||||
case 3:
|
||||
return component::w;
|
||||
}
|
||||
return component::x;
|
||||
}
|
||||
|
||||
inline float4 _texture_gather_internal(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, access::sample> tex,
|
||||
float2 uv,
|
||||
const int comp = 0,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->gather(*tex.samp, uv, offset);
|
||||
}
|
||||
|
||||
inline float4 _texture_gather_internal(
|
||||
thread _mtl_combined_image_sampler_depth_2d_array<float, access::sample> tex,
|
||||
float3 uva,
|
||||
const int comp = 0,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->gather(*tex.samp, uva.xy, uint(uva.z), offset);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_gather_internal(
|
||||
thread _mtl_combined_image_sampler_2d<T, access::sample> tex,
|
||||
float2 uv,
|
||||
const int comp = 0,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->gather(*tex.samp, uv, offset);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline vec<T, 4> _texture_gather_internal(
|
||||
thread _mtl_combined_image_sampler_2d_array<T, access::sample> tex,
|
||||
float3 uva,
|
||||
const int comp = 0,
|
||||
int2 offset = int2(0))
|
||||
{
|
||||
return tex.texture->gather(*tex.samp, uva.xy, uint(uva.z), offset);
|
||||
}
|
||||
|
||||
/* Texture Grad. */
|
||||
inline float4 _texture_grad_internal(
|
||||
thread _mtl_combined_image_sampler_2d<float, access::sample> tex,
|
||||
float2 uv,
|
||||
float2 dpdx,
|
||||
float2 dpdy)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uv, gradient2d(dpdx, dpdy));
|
||||
}
|
||||
|
||||
inline float4 _texture_grad_internal(
|
||||
thread _mtl_combined_image_sampler_2d_array<float, access::sample> tex,
|
||||
float3 uva,
|
||||
float2 dpdx,
|
||||
float2 dpdy)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uva.xy, uint(uva.z), gradient2d(dpdx, dpdy));
|
||||
}
|
||||
|
||||
inline float4 _texture_grad_internal(
|
||||
thread _mtl_combined_image_sampler_3d<float, access::sample> tex,
|
||||
float3 uvw,
|
||||
float3 dpdx,
|
||||
float3 dpdy)
|
||||
{
|
||||
return tex.texture->sample(*tex.samp, uvw, gradient3d(dpdx, dpdy));
|
||||
}
|
||||
|
||||
/* Texture write support. */
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
float w = tex.texture->get_width();
|
||||
if (_coord >= 0 && _coord < w) {
|
||||
tex.texture->write(value, uint(_coord));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
tex.texture->write(value, uint(_coord));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint(_coord));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
float w = tex.texture->get_width();
|
||||
float h = tex.texture->get_height();
|
||||
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) {
|
||||
tex.texture->write(value, uint2(_coord.xy));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
float w = tex.texture->get_width();
|
||||
float h = tex.texture->get_height();
|
||||
float d = tex.texture->get_array_size();
|
||||
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
|
||||
_coord.z < d)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy), _coord.z);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy), _coord.z);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy), _coord.z);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
float w = tex.texture->get_width();
|
||||
float h = tex.texture->get_height();
|
||||
float d = tex.texture->get_depth();
|
||||
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
|
||||
_coord.z < d)
|
||||
{
|
||||
tex.texture->write(value, uint3(_coord.xyz));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
tex.texture->write(value, uint3(_coord.xyz));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
T _coord,
|
||||
S value)
|
||||
{
|
||||
tex.texture->write(value, uint3(_coord.xyz));
|
||||
}
|
||||
|
||||
/* Texture atomic operations are only supported in Metal 3.1 and onward (macOS 14.0 Sonoma). */
|
||||
#ifdef MTL_SUPPORTS_TEXTURE_ATOMICS
|
||||
|
||||
/* Image atomic operations. */
|
||||
# define imageAtomicMin(tex, coord, data) _texture_image_atomic_min_internal(tex, coord, data)
|
||||
# define imageAtomicAdd(tex, coord, data) _texture_image_atomic_add_internal(tex, coord, data)
|
||||
# define imageAtomicExchange(tex, coord, data) \
|
||||
_texture_image_atomic_exchange_internal(tex, coord, data)
|
||||
# define imageAtomicXor(tex, coord, data) _texture_image_atomic_xor_internal(tex, coord, data)
|
||||
# define imageAtomicOr(tex, coord, data) _texture_image_atomic_or_internal(tex, coord, data)
|
||||
|
||||
/* Atomic OR. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_or(uint(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_or(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_or(uint2(coord.xy), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_or(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_or(uint3(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
/* Atomic XOR. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_xor(uint(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_xor(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_xor(uint2(coord.xy), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_xor(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_xor(uint3(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
/* Atomic Min. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint2(coord.xy), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_min(uint3(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
/* Atomic Add. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_add(uint(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_add(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_add(uint2(coord.xy), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_add(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_fetch_add(uint3(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
/* Atomic Exchange. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
int coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint(coord.x), uint(coord.y), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
int2 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint2(coord.xy), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint2(coord.xy), uint(coord.z), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
int3 coord,
|
||||
S data)
|
||||
{
|
||||
return tex.texture->atomic_exchange(uint3(coord), vec<S, 4>(data)).x;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/**
|
||||
* Texture atomic fallback function entry points.
|
||||
* NOTE: When texture atomics are unsupported, the wrapped type contains a buffer-backed 2D
|
||||
* texture. Atomic operations happen directly on the underlying buffer, and texture coordinates are
|
||||
* remapped into 2D texture space from 2D Array or 3D texture coordinates.
|
||||
*/
|
||||
|
||||
/* Image atomic operations. */
|
||||
# define imageAtomicMin(tex, coord, data) \
|
||||
_texture_image_atomic_min_internal_fallback(tex, coord, data)
|
||||
# define imageAtomicAdd(tex, coord, data) \
|
||||
_texture_image_atomic_add_internal_fallack(tex, coord, data)
|
||||
# define imageAtomicExchange(tex, coord, data) \
|
||||
_texture_image_atomic_exchange_internal_fallack(tex, coord, data)
|
||||
# define imageAtomicXor(tex, coord, data) \
|
||||
_texture_image_atomic_xor_internal_fallack(tex, coord, data)
|
||||
# define imageAtomicOr(tex, coord, data) \
|
||||
_texture_image_atomic_or_internal_fallack(tex, coord, data)
|
||||
|
||||
/** Pixel address location remapping. */
|
||||
|
||||
/* Map 2D/3D texture coordinate into a linear pixel ID. */
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
|
||||
uint2 coord)
|
||||
{
|
||||
return (coord.x + coord.y * uint(tex.texture->get_width()));
|
||||
}
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
|
||||
int2 coord)
|
||||
{
|
||||
return tex_coord_to_linear_px(tex, uint2(coord));
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
|
||||
uint3 coord)
|
||||
{
|
||||
return (coord.x + coord.y * tex.texture_size.x +
|
||||
coord.z * (tex.texture_size.x * tex.texture_size.y));
|
||||
}
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
|
||||
int3 coord)
|
||||
{
|
||||
return tex_coord_to_linear_px(tex, uint3(coord));
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
|
||||
uint3 coord)
|
||||
{
|
||||
return (coord.x + coord.y * tex.texture_size.x +
|
||||
coord.z * (tex.texture_size.x * tex.texture_size.y));
|
||||
}
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_px(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
|
||||
int3 coord)
|
||||
{
|
||||
return tex_coord_to_linear_px(tex, uint3(coord));
|
||||
}
|
||||
|
||||
/* Map 3D texture coordinate into 2D texture space. */
|
||||
template<typename S, access A>
|
||||
uint2 tex_coord_3d_to_2d(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
|
||||
uint3 coord)
|
||||
{
|
||||
uint linear_id = tex_coord_to_linear_px(tex, coord);
|
||||
uint tex_full_w = uint(tex.texture->get_width());
|
||||
uint2 out_2dcoord;
|
||||
out_2dcoord.y = linear_id / tex_full_w;
|
||||
out_2dcoord.x = linear_id - (out_2dcoord.y * tex_full_w);
|
||||
return out_2dcoord;
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
uint2 tex_coord_3d_to_2d(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
|
||||
uint3 coord)
|
||||
{
|
||||
uint linear_id = tex_coord_to_linear_px(tex, coord);
|
||||
uint tex_full_w = uint(tex.texture->get_width());
|
||||
uint2 out_2dcoord;
|
||||
out_2dcoord.y = linear_id / tex_full_w;
|
||||
out_2dcoord.x = linear_id - (out_2dcoord.y * tex_full_w);
|
||||
return out_2dcoord;
|
||||
}
|
||||
|
||||
template<int N> bool in_range(vec<int, N> value, vec<int, N> min, vec<int, N> max)
|
||||
{
|
||||
return (all(value >= min) && all(value < max));
|
||||
}
|
||||
|
||||
/* Map 2D/3D texture coordinate into buffer index, accounting for padded row widths. */
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
|
||||
uint2 coord)
|
||||
{
|
||||
return (coord.x + coord.y * uint(tex.aligned_width));
|
||||
}
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
|
||||
int2 coord)
|
||||
{
|
||||
return tex_coord_to_linear_buffer_id(tex, uint2(coord));
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_buffer_id(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, uint3 coord)
|
||||
{
|
||||
uint2 coord2d = tex_coord_3d_to_2d(tex, coord);
|
||||
return (coord2d.x + coord2d.y * uint(tex.aligned_width));
|
||||
}
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_buffer_id(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord)
|
||||
{
|
||||
return tex_coord_to_linear_buffer_id(tex, uint3(coord));
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
|
||||
uint3 coord)
|
||||
{
|
||||
uint2 coord2d = tex_coord_3d_to_2d(tex, coord);
|
||||
return (coord2d.x + coord2d.y * uint(tex.aligned_width));
|
||||
}
|
||||
template<typename S, access A>
|
||||
uint tex_coord_to_linear_buffer_id(thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
|
||||
int3 coord)
|
||||
{
|
||||
return tex_coord_to_linear_buffer_id(tex, uint3(coord));
|
||||
}
|
||||
|
||||
/* imageAtomicMin. */
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal_fallback(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
|
||||
{
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicMin(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal_fallback(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicMin(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_min_internal_fallback(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicMin(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
/* imageAtomicAdd. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
|
||||
{
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicAdd(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicAdd(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_add_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicAdd(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
/* imageAtomicExchange. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
|
||||
{
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicExchange(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicExchange(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_exchange_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicExchange(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
/* imageAtomicXor. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
|
||||
{
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicXor(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicXor(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_xor_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicXor(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
/* imageAtomicOr. */
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, int2 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xy, int2(0, 0), int2(tex.texture->get_width(0), tex.texture->get_height(0))))
|
||||
{
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicOr(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicOr(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
template<typename S, access A>
|
||||
S _texture_image_atomic_or_internal_fallack(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, int3 coord, S data)
|
||||
{
|
||||
if (!in_range(coord.xyz, int3(0, 0, 0), int3(tex.texture_size.xyz))) {
|
||||
return S(0);
|
||||
}
|
||||
uint linear_id = tex_coord_to_linear_buffer_id(tex, coord);
|
||||
return atomicOr(tex.buffer[linear_id], data);
|
||||
}
|
||||
|
||||
/** Texture sampling, reading and writing functions with layer mapping. */
|
||||
|
||||
/* Texel Fetch. */
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, access::sample> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
return tex.texture->sample(_point_sample_, float2(texel.xy + offset.xy), level(lod));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, access::sample> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->sample(
|
||||
_point_sample_, float2(tex_coord_3d_to_2d(tex, uint3(texel + offset))), level(lod));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, access::sample> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->sample(
|
||||
_point_sample_, float2(tex_coord_3d_to_2d(tex, uint3(texel + offset))), level(lod));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
|
||||
float w = tex.texture->get_width() >> lod;
|
||||
float h = tex.texture->get_height() >> lod;
|
||||
if ((texel.x + offset.x) >= 0 && (texel.x + offset.x) < w && (texel.y + offset.y) >= 0 &&
|
||||
(texel.y + offset.y) < h)
|
||||
{
|
||||
return tex.texture->read(uint2(texel + offset), lod);
|
||||
}
|
||||
else {
|
||||
return vec<S, 4>(0);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->read(tex_coord_3d_to_2d(tex, uint3(texel + offset)), lod);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->read(tex_coord_3d_to_2d(tex, uint3(texel + offset)), lod);
|
||||
}
|
||||
|
||||
/* imageStore. */
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
|
||||
{
|
||||
float w = tex.texture_size.x;
|
||||
float h = tex.texture_size.y;
|
||||
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h) {
|
||||
tex.texture->write(value, uint2(_coord.xy));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(
|
||||
thread _mtl_combined_image_sampler_2d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
|
||||
{
|
||||
tex.texture->write(value, uint2(_coord.xy));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
float w = tex.texture_size.x;
|
||||
float h = tex.texture_size.y;
|
||||
float d = tex.texture_size.z;
|
||||
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
|
||||
_coord.z < d)
|
||||
{
|
||||
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(
|
||||
thread _mtl_combined_image_sampler_2d_array_atomic_fallback<S, A> tex,
|
||||
T _coord,
|
||||
vec<S, 4> value)
|
||||
{
|
||||
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
|
||||
{
|
||||
float w = tex.texture_size.x;
|
||||
float h = tex.texture_size.y;
|
||||
float d = tex.texture_size.z;
|
||||
if (_coord.x >= 0 && _coord.x < w && _coord.y >= 0 && _coord.y < h && _coord.z >= 0 &&
|
||||
_coord.z < d)
|
||||
{
|
||||
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline void _texture_write_internal_fast(
|
||||
thread _mtl_combined_image_sampler_3d_atomic_fallback<S, A> tex, T _coord, vec<S, 4> value)
|
||||
{
|
||||
tex.texture->write(value, tex_coord_3d_to_2d(tex, uint3(_coord)));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Matrix compare operators. */
|
||||
/** TODO(fclem): Template. */
|
||||
inline bool operator==(float4x4 a, float4x4 b)
|
||||
{
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (any(a[i] != b[i])) {
|
||||
return false;
|
||||
}
|
||||
#define EQ_OP(type, ...) \
|
||||
inline bool operator==(type a, type b) \
|
||||
{ \
|
||||
return __VA_ARGS__; \
|
||||
}
|
||||
return true;
|
||||
}
|
||||
inline bool operator==(float3x3 a, float3x3 b)
|
||||
{
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (any(a[i] != b[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
inline bool operator==(float2x2 a, float2x2 b)
|
||||
{
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (any(a[i] != b[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
EQ_OP(float2x2, all(a[0] == b[0]) && all(a[1] == b[1]))
|
||||
EQ_OP(float2x3, all(a[0] == b[0]) && all(a[1] == b[1]))
|
||||
EQ_OP(float2x4, all(a[0] == b[0]) && all(a[1] == b[1]))
|
||||
EQ_OP(float3x2, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]))
|
||||
EQ_OP(float3x3, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]))
|
||||
EQ_OP(float3x4, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]))
|
||||
EQ_OP(float4x2, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3]))
|
||||
EQ_OP(float4x3, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3]))
|
||||
EQ_OP(float4x4, all(a[0] == b[0]) && all(a[1] == b[1]) && all(a[2] == b[2]) && all(a[3] == b[3]))
|
||||
#undef EQ_OP
|
||||
|
||||
inline bool operator!=(float4x4 a, float4x4 b)
|
||||
{
|
||||
return !(a == b);
|
||||
}
|
||||
inline bool operator!=(float3x3 a, float3x3 b)
|
||||
{
|
||||
return !(a == b);
|
||||
}
|
||||
inline bool operator!=(float2x2 a, float2x2 b)
|
||||
template<int N, int M> inline bool operator!=(matrix<float, N, M> a, matrix<float, N, M> b)
|
||||
{
|
||||
return !(a == b);
|
||||
}
|
||||
|
||||
/* Matrix unary minus operator. */
|
||||
|
||||
inline float4x4 operator-(float4x4 a)
|
||||
template<int N, int M> inline matrix<float, N, M> operator-(matrix<float, N, M> a)
|
||||
{
|
||||
float4x4 b;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
b[i] = -a[i];
|
||||
}
|
||||
return b;
|
||||
}
|
||||
inline float3x3 operator-(float3x3 a)
|
||||
{
|
||||
float3x3 b;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
b[i] = -a[i];
|
||||
}
|
||||
return b;
|
||||
}
|
||||
inline float2x2 operator-(float2x2 a)
|
||||
{
|
||||
float2x2 b;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
b[i] = -a[i];
|
||||
}
|
||||
return b;
|
||||
return a * -1.0;
|
||||
}
|
||||
|
||||
/* SSBO Vertex Fetch Mode. */
|
||||
@@ -1954,115 +901,29 @@ inline float2x2 operator-(float2x2 a)
|
||||
/* Common Functions. */
|
||||
#define dFdx(x) dfdx(x)
|
||||
#define dFdy(x) dfdy(x)
|
||||
#define mod(x, y) _mtlmod(x, y)
|
||||
#define discard discard_fragment()
|
||||
#define inversesqrt rsqrt
|
||||
|
||||
inline float radians(float deg)
|
||||
{
|
||||
/* Constant factor: M_PI_F/180.0. */
|
||||
return deg * 0.01745329251f;
|
||||
}
|
||||
|
||||
inline float degrees(float rad)
|
||||
{
|
||||
/* Constant factor: 180.0/M_PI_F. */
|
||||
return rad * 57.2957795131;
|
||||
}
|
||||
|
||||
#define select(A, B, C) mix(A, B, C)
|
||||
|
||||
/* Type conversions and type truncation. */
|
||||
inline float4 to_float4(float3 val)
|
||||
{
|
||||
return float4(val, 1.0);
|
||||
}
|
||||
|
||||
/* Type conversions and type truncation (Utility Functions). */
|
||||
inline float3x3 mat4_to_mat3(float4x4 matrix)
|
||||
{
|
||||
return float3x3(matrix[0].xyz, matrix[1].xyz, matrix[2].xyz);
|
||||
}
|
||||
|
||||
inline int floatBitsToInt(float f)
|
||||
{
|
||||
return as_type<int>(f);
|
||||
}
|
||||
|
||||
inline int2 floatBitsToInt(float2 f)
|
||||
{
|
||||
return as_type<int2>(f);
|
||||
}
|
||||
|
||||
inline int3 floatBitsToInt(float3 f)
|
||||
{
|
||||
return as_type<int3>(f);
|
||||
}
|
||||
|
||||
inline int4 floatBitsToInt(float4 f)
|
||||
{
|
||||
return as_type<int4>(f);
|
||||
}
|
||||
|
||||
inline uint floatBitsToUint(float f)
|
||||
{
|
||||
return as_type<uint>(f);
|
||||
}
|
||||
|
||||
inline uint2 floatBitsToUint(float2 f)
|
||||
{
|
||||
return as_type<uint2>(f);
|
||||
}
|
||||
|
||||
inline uint3 floatBitsToUint(float3 f)
|
||||
{
|
||||
return as_type<uint3>(f);
|
||||
}
|
||||
|
||||
inline uint4 floatBitsToUint(float4 f)
|
||||
{
|
||||
return as_type<uint4>(f);
|
||||
}
|
||||
|
||||
inline float intBitsToFloat(int f)
|
||||
{
|
||||
return as_type<float>(f);
|
||||
}
|
||||
|
||||
inline float2 intBitsToFloat(int2 f)
|
||||
{
|
||||
return as_type<float2>(f);
|
||||
}
|
||||
|
||||
inline float3 intBitsToFloat(int3 f)
|
||||
{
|
||||
return as_type<float3>(f);
|
||||
}
|
||||
|
||||
inline float4 intBitsToFloat(int4 f)
|
||||
{
|
||||
return as_type<float4>(f);
|
||||
}
|
||||
|
||||
inline float uintBitsToFloat(uint f)
|
||||
{
|
||||
return as_type<float>(f);
|
||||
}
|
||||
|
||||
inline float2 uintBitsToFloat(uint2 f)
|
||||
{
|
||||
return as_type<float2>(f);
|
||||
}
|
||||
|
||||
inline float3 uintBitsToFloat(uint3 f)
|
||||
{
|
||||
return as_type<float3>(f);
|
||||
}
|
||||
|
||||
inline float4 uintBitsToFloat(uint4 f)
|
||||
{
|
||||
return as_type<float4>(f);
|
||||
}
|
||||
/* clang-format off */
|
||||
inline float radians(float deg) { return deg * 0.01745329251; /* M_PI_F / 180 */ }
|
||||
inline float degrees(float rad) { return rad * 57.2957795131; /* 180 / M_PI_F */ }
|
||||
inline int floatBitsToInt(float f) { return as_type<int>(f); }
|
||||
inline int2 floatBitsToInt(float2 f) { return as_type<int2>(f); }
|
||||
inline int3 floatBitsToInt(float3 f) { return as_type<int3>(f); }
|
||||
inline int4 floatBitsToInt(float4 f) { return as_type<int4>(f); }
|
||||
inline uint floatBitsToUint(float f) { return as_type<uint>(f); }
|
||||
inline uint2 floatBitsToUint(float2 f) { return as_type<uint2>(f); }
|
||||
inline uint3 floatBitsToUint(float3 f) { return as_type<uint3>(f); }
|
||||
inline uint4 floatBitsToUint(float4 f) { return as_type<uint4>(f); }
|
||||
inline float intBitsToFloat(int f) { return as_type<float>(f); }
|
||||
inline float2 intBitsToFloat(int2 f) { return as_type<float2>(f); }
|
||||
inline float3 intBitsToFloat(int3 f) { return as_type<float3>(f); }
|
||||
inline float4 intBitsToFloat(int4 f) { return as_type<float4>(f); }
|
||||
inline float uintBitsToFloat(uint f) { return as_type<float>(f); }
|
||||
inline float2 uintBitsToFloat(uint2 f) { return as_type<float2>(f); }
|
||||
inline float3 uintBitsToFloat(uint3 f) { return as_type<float3>(f); }
|
||||
inline float4 uintBitsToFloat(uint4 f) { return as_type<float4>(f); }
|
||||
/* clang-format on */
|
||||
|
||||
#define bitfieldReverse reverse_bits
|
||||
#define bitfieldExtract extract_bits
|
||||
@@ -2093,89 +954,32 @@ template<typename T> int findMSB(T x)
|
||||
return int(sizeof(T) * 8) - 1 - int(clz(x));
|
||||
}
|
||||
|
||||
#define unpackUnorm4x8 unpack_unorm4x8_to_float
|
||||
#define unpackSnorm4x8 unpack_snorm4x8_to_float
|
||||
#define unpackUnorm2x16 unpack_unorm2x16_to_float
|
||||
#define unpackSnorm2x16 unpack_snorm2x16_to_float
|
||||
|
||||
/* Texture size functions. Add texture types as needed. */
|
||||
#define imageSize(image) textureSize(image, 0)
|
||||
|
||||
template<typename T, access A>
|
||||
int textureSize(thread _mtl_combined_image_sampler_1d<T, A> image, uint lod)
|
||||
{
|
||||
return int(image.texture->get_width());
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int2 textureSize(thread _mtl_combined_image_sampler_1d_array<T, A> image, uint lod)
|
||||
{
|
||||
return int2(image.texture->get_width(), image.texture->get_array_size());
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int2 textureSize(thread _mtl_combined_image_sampler_2d<T, A> image, uint lod)
|
||||
{
|
||||
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int2 textureSize(thread _mtl_combined_image_sampler_depth_2d<T, A> image, uint lod)
|
||||
{
|
||||
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int3 textureSize(thread _mtl_combined_image_sampler_2d_array<T, A> image, uint lod)
|
||||
{
|
||||
return int3(image.texture->get_width(lod),
|
||||
image.texture->get_height(lod),
|
||||
image.texture->get_array_size());
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int3 textureSize(thread _mtl_combined_image_sampler_depth_2d_array<T, A> image, uint lod)
|
||||
{
|
||||
return int3(image.texture->get_width(lod),
|
||||
image.texture->get_height(lod),
|
||||
image.texture->get_array_size());
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int2 textureSize(thread _mtl_combined_image_sampler_cube<T, A> image, uint lod)
|
||||
{
|
||||
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int3 textureSize(thread _mtl_combined_image_sampler_3d<T, A> image, uint lod)
|
||||
{
|
||||
return int3(image.texture->get_width(lod),
|
||||
image.texture->get_height(lod),
|
||||
image.texture->get_depth(lod));
|
||||
}
|
||||
|
||||
#ifndef MTL_SUPPORTS_TEXTURE_ATOMICS
|
||||
/* textureSize functions for fallback atomic textures. */
|
||||
template<typename T, access A>
|
||||
int2 textureSize(thread _mtl_combined_image_sampler_2d_atomic_fallback<T, A> image, uint lod)
|
||||
int2 textureSize(thread _mtl_sampler_2d_atomic<T, A> image, uint lod)
|
||||
{
|
||||
return int2(image.texture->get_width(lod), image.texture->get_height(lod));
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int3 textureSize(thread _mtl_combined_image_sampler_2d_array_atomic_fallback<T, A> image, uint lod)
|
||||
int3 textureSize(thread _mtl_sampler_2d_array_atomic<T, A> image, uint lod)
|
||||
{
|
||||
return int3(image.texture_size);
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
int3 textureSize(thread _mtl_combined_image_sampler_3d_atomic_fallback<T, A> image, uint lod)
|
||||
int3 textureSize(thread _mtl_sampler_3d_atomic<T, A> image, uint lod)
|
||||
{
|
||||
return int3(image.texture_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define unpackUnorm4x8 unpack_unorm4x8_to_float
|
||||
#define unpackSnorm4x8 unpack_snorm4x8_to_float
|
||||
#define unpackUnorm2x16 unpack_unorm2x16_to_float
|
||||
#define unpackSnorm2x16 unpack_snorm2x16_to_float
|
||||
|
||||
/* Equality and comparison functions. */
|
||||
#define lessThan(a, b) ((a) < (b))
|
||||
#define lessThanEqual(a, b) ((a) <= (b))
|
||||
@@ -2184,49 +988,18 @@ int3 textureSize(thread _mtl_combined_image_sampler_3d_atomic_fallback<T, A> ima
|
||||
#define equal(a, b) ((a) == (b))
|
||||
#define notEqual(a, b) ((a) != (b))
|
||||
|
||||
template<typename T, int n> bool all(vec<T, n> x)
|
||||
{
|
||||
bool _all = true;
|
||||
for (int i = 0; i < n; i++) {
|
||||
_all = _all && (x[i] > 0);
|
||||
}
|
||||
return _all;
|
||||
}
|
||||
|
||||
template<typename T, int n> bool any(vec<T, n> x)
|
||||
{
|
||||
bool _any = false;
|
||||
for (int i = 0; i < n; i++) {
|
||||
_any = _any || (x[i] > 0);
|
||||
}
|
||||
return _any;
|
||||
}
|
||||
|
||||
/* Modulo functionality. */
|
||||
int _mtlmod(int a, int b)
|
||||
{
|
||||
return a - b * (a / b);
|
||||
}
|
||||
|
||||
float _mtlmod(float a, float b)
|
||||
{
|
||||
return a - b * floor(a / b);
|
||||
}
|
||||
|
||||
template<typename T, int n> vec<T, n> _mtlmod(vec<T, n> x, vec<T, n> y)
|
||||
{
|
||||
return x - (y * floor(x / y));
|
||||
}
|
||||
|
||||
template<typename T, int n, typename U> vec<T, n> _mtlmod(vec<T, n> x, U y)
|
||||
{
|
||||
return x - (vec<T, n>(y) * floor(x / vec<T, n>(y)));
|
||||
}
|
||||
|
||||
template<typename T, typename U, int n> vec<U, n> _mtlmod(T x, vec<U, n> y)
|
||||
{
|
||||
return vec<U, n>(x) - (y * floor(vec<U, n>(x) / y));
|
||||
}
|
||||
/* `mod(x, y)` is defined as `x - (y * floor(x / y))` in the metal specification.
|
||||
* This is not compatible with GLSL implementation. So we override it with a compatible one. */
|
||||
#define mod(x, y) _compatible_mod(x, y)
|
||||
#define MOD \
|
||||
{ \
|
||||
return x - y * floor(x / y); \
|
||||
}
|
||||
float _compatible_mod(float x, float y) MOD;
|
||||
template<int S> vec<float, S> _compatible_mod(vec<float, S> x, float y) MOD;
|
||||
template<int S> vec<float, S> _compatible_mod(vec<float, S> x, vec<float, S> y) MOD;
|
||||
#undef MOD
|
||||
|
||||
/* Mathematical functions. */
|
||||
template<typename T> T atan(T y, T x)
|
||||
@@ -2249,10 +1022,7 @@ float4x4 inverse(float4x4 a)
|
||||
float b09 = a[2][1] * a[3][2] - a[2][2] * a[3][1];
|
||||
float b10 = a[2][1] * a[3][3] - a[2][3] * a[3][1];
|
||||
float b11 = a[2][2] * a[3][3] - a[2][3] * a[3][2];
|
||||
|
||||
float inv_det = 1.0 / (b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06);
|
||||
|
||||
float4x4 adjoint{};
|
||||
float4x4 adjoint;
|
||||
adjoint[0][0] = a[1][1] * b11 - a[1][2] * b10 + a[1][3] * b09;
|
||||
adjoint[0][1] = a[0][2] * b10 - a[0][1] * b11 - a[0][3] * b09;
|
||||
adjoint[0][2] = a[3][1] * b05 - a[3][2] * b04 + a[3][3] * b03;
|
||||
@@ -2269,40 +1039,38 @@ float4x4 inverse(float4x4 a)
|
||||
adjoint[3][1] = a[0][0] * b09 - a[0][1] * b07 + a[0][2] * b06;
|
||||
adjoint[3][2] = a[3][1] * b01 - a[3][0] * b03 - a[3][2] * b00;
|
||||
adjoint[3][3] = a[2][0] * b03 - a[2][1] * b01 + a[2][2] * b00;
|
||||
return adjoint * inv_det;
|
||||
float determinant = b00 * b11 - b01 * b10 + b02 * b09 + b03 * b08 - b04 * b07 + b05 * b06;
|
||||
/* Multiplying by inverse since matrix types don't have divide operators. */
|
||||
return adjoint * (1.0 / determinant);
|
||||
}
|
||||
|
||||
float3x3 inverse(float3x3 m)
|
||||
{
|
||||
float b00 = m[1][1] * m[2][2] - m[2][1] * m[1][2];
|
||||
float b01 = m[0][1] * m[2][2] - m[2][1] * m[0][2];
|
||||
float b02 = m[0][1] * m[1][2] - m[1][1] * m[0][2];
|
||||
|
||||
float inv_det = 1.0 / (m[0][0] * b00 - m[1][0] * b01 + m[2][0] * b02);
|
||||
|
||||
float3x3 adjoint{};
|
||||
adjoint[0][0] = +b00;
|
||||
adjoint[0][1] = -b01;
|
||||
adjoint[0][2] = +b02;
|
||||
float3x3 adjoint;
|
||||
adjoint[0][0] = +(m[1][1] * m[2][2] - m[2][1] * m[1][2]);
|
||||
adjoint[0][1] = -(m[0][1] * m[2][2] - m[2][1] * m[0][2]);
|
||||
adjoint[0][2] = +(m[0][1] * m[1][2] - m[1][1] * m[0][2]);
|
||||
adjoint[1][0] = -(m[1][0] * m[2][2] - m[2][0] * m[1][2]);
|
||||
adjoint[1][1] = +(m[0][0] * m[2][2] - m[2][0] * m[0][2]);
|
||||
adjoint[1][2] = -(m[0][0] * m[1][2] - m[1][0] * m[0][2]);
|
||||
adjoint[2][0] = +(m[1][0] * m[2][1] - m[2][0] * m[1][1]);
|
||||
adjoint[2][1] = -(m[0][0] * m[2][1] - m[2][0] * m[0][1]);
|
||||
adjoint[2][2] = +(m[0][0] * m[1][1] - m[1][0] * m[0][1]);
|
||||
return adjoint * inv_det;
|
||||
float determinant = m[0][0] * adjoint[0][0] + m[1][0] * adjoint[0][1] + m[2][0] * adjoint[0][2];
|
||||
/* Multiplying by inverse since matrix types don't have divide operators. */
|
||||
return adjoint * (1.0 / determinant);
|
||||
}
|
||||
|
||||
float2x2 inverse(float2x2 m)
|
||||
{
|
||||
float inv_det = 1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]);
|
||||
|
||||
float2x2 adjoint{};
|
||||
float2x2 adjoint;
|
||||
adjoint[0][0] = +m[1][1];
|
||||
adjoint[1][0] = -m[1][0];
|
||||
adjoint[0][1] = -m[0][1];
|
||||
adjoint[1][1] = +m[0][0];
|
||||
return adjoint * inv_det;
|
||||
float determinant = m[0][0] * m[1][1] - m[1][0] * m[0][1];
|
||||
/* Multiplying by inverse since matrix types don't have divide operators. */
|
||||
return adjoint * (1.0 / determinant);
|
||||
}
|
||||
|
||||
/* Additional overloads for builtin functions. */
|
||||
@@ -2311,226 +1079,70 @@ float distance(float x, float y)
|
||||
return abs(y - x);
|
||||
}
|
||||
|
||||
/* Overload for mix(A, B, float ratio). */
|
||||
template<typename T, int Size> vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, float val)
|
||||
/* Overload for mix(vec<T>, vec<T>, float). */
|
||||
template<typename T, int S> vec<T, S> mix(vec<T, S> a, vec<T, S> b, float fac)
|
||||
{
|
||||
return mix(a, b, vec<T, Size>(val));
|
||||
}
|
||||
|
||||
/* Overload for mix(A, B, bvec<N>). */
|
||||
template<typename T, int Size>
|
||||
vec<T, Size> mix(vec<T, Size> a, vec<T, Size> b, vec<int, Size> mask)
|
||||
{
|
||||
vec<T, Size> result;
|
||||
for (int i = 0; i < Size; i++) {
|
||||
result[i] = mask[i] ? b[i] : a[i];
|
||||
}
|
||||
return result;
|
||||
return a * (1.0 - fac) + b * fac;
|
||||
}
|
||||
|
||||
/* Using vec<bool, S> does not appear to work, splitting cases. */
|
||||
/* Overload for mix(A, B, bvec<N>). */
|
||||
template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bvec4 mask)
|
||||
{
|
||||
vec<T, 4> result;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
result[i] = mask[i] ? b[i] : a[i];
|
||||
#define SELECT \
|
||||
{ \
|
||||
return select(a, b, mask); \
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Overload for mix(A, B, bvec<N>). */
|
||||
template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bvec3 mask)
|
||||
{
|
||||
vec<T, 3> result;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
result[i] = mask[i] ? b[i] : a[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
template<typename T> vec<T, 4> mix(vec<T, 4> a, vec<T, 4> b, bool4 mask) SELECT;
|
||||
template<typename T> vec<T, 3> mix(vec<T, 3> a, vec<T, 3> b, bool3 mask) SELECT;
|
||||
template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bool2 mask) SELECT;
|
||||
|
||||
/* Overload for mix(A, B, bvec<N>). */
|
||||
template<typename T> vec<T, 2> mix(vec<T, 2> a, vec<T, 2> b, bvec2 mask)
|
||||
{
|
||||
vec<T, 2> result;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
result[i] = mask[i] ? b[i] : a[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Overload for mix(A, B, bvec<N>). */
|
||||
template<typename T> T mix(T a, T b, bool mask)
|
||||
{
|
||||
return (mask) ? b : a;
|
||||
}
|
||||
#undef SELECT
|
||||
|
||||
template<typename T, int Size> bool is_zero(vec<T, Size> a)
|
||||
{
|
||||
for (int i = 0; i < Size; i++) {
|
||||
if (a[i] != T(0)) {
|
||||
return false;
|
||||
}
|
||||
return all(a == vec<T, Size>(0));
|
||||
}
|
||||
|
||||
#define in
|
||||
#define out thread
|
||||
#define inout thread
|
||||
#define _in_sta
|
||||
#define _in_end
|
||||
#define _out_sta (&
|
||||
#define _out_end )
|
||||
#define _inout_sta (&
|
||||
#define _inout_end )
|
||||
|
||||
#define shared threadgroup
|
||||
#define _shared_sta (&
|
||||
#define _shared_end )
|
||||
|
||||
/* Defines generated by glsl_preprocess that contains threadgroup variables related codegen.
|
||||
* See glsl_preprocess for more detail. */
|
||||
#define MSL_SHARED_VARS_ARGS
|
||||
#define MSL_SHARED_VARS_ASSIGN
|
||||
#define MSL_SHARED_VARS_DECLARE
|
||||
#define MSL_SHARED_VARS_PASS
|
||||
|
||||
/* Matrix reshaping functions. */
|
||||
#define RESHAPE(mat_to, mat_from, ...) \
|
||||
mat_to to_##mat_to(mat_from m) \
|
||||
{ \
|
||||
return mat_to(__VA_ARGS__); \
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Matrix conversion fallback for functional style casting & constructors.
|
||||
* To avoid name collision with the types, they are replaced with uppercase version
|
||||
* before compilation.
|
||||
*/
|
||||
/* clang-format off */
|
||||
RESHAPE(float2x2, float3x3, m[0].xy, m[1].xy)
|
||||
RESHAPE(float2x2, float4x4, m[0].xy, m[1].xy)
|
||||
RESHAPE(float3x3, float4x4, m[0].xyz, m[1].xyz, m[2].xyz)
|
||||
RESHAPE(float3x3, float2x2, m[0].x, m[0].y, 0, m[1].x, m[1].y, 0, 0, 0, 1)
|
||||
RESHAPE(float4x4, float2x2, m[0].x, m[0].y, 0, 0, m[1].x, m[1].y, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1)
|
||||
RESHAPE(float4x4, float3x3, m[0].x, m[0].y, m[0].z, 0, m[1].x, m[1].y, m[1].z, 0, m[2].x, m[2].y, m[2].z, 0, 0, 0, 0, 1)
|
||||
/* clang-format on */
|
||||
/* TODO(fclem): Remove. Use Transform instead. */
|
||||
RESHAPE(float3x3, float3x4, m[0].xyz, m[1].xyz, m[2].xyz)
|
||||
#undef RESHAPE
|
||||
|
||||
mat2 MAT2x2(vec2 a, vec2 b)
|
||||
{
|
||||
return mat2(a, b);
|
||||
}
|
||||
mat2 MAT2x2(float a1, float a2, float b1, float b2)
|
||||
{
|
||||
return mat2(vec2(a1, a2), vec2(b1, b2));
|
||||
}
|
||||
mat2 MAT2x2(float f)
|
||||
{
|
||||
return mat2(f);
|
||||
}
|
||||
mat2 MAT2x2(mat3 m)
|
||||
{
|
||||
return mat2(m[0].xy, m[1].xy);
|
||||
}
|
||||
mat2 MAT2x2(mat4 m)
|
||||
{
|
||||
return mat2(m[0].xy, m[1].xy);
|
||||
}
|
||||
|
||||
mat3 MAT3x3(vec3 a, vec3 b, vec3 c)
|
||||
{
|
||||
return mat3(a, b, c);
|
||||
}
|
||||
mat3 MAT3x3(
|
||||
float a1, float a2, float a3, float b1, float b2, float b3, float c1, float c2, float c3)
|
||||
{
|
||||
return mat3(vec3(a1, a2, a3), vec3(b1, b2, b3), vec3(c1, c2, c3));
|
||||
}
|
||||
mat3 MAT3x3(vec3 a, float b1, float b2, float b3, float c1, float c2, float c3)
|
||||
{
|
||||
return mat3(a, vec3(b1, b2, b3), vec3(c1, c2, c3));
|
||||
}
|
||||
mat3 MAT3x3(float f)
|
||||
{
|
||||
return mat3(f);
|
||||
}
|
||||
mat3 MAT3x3(mat4 m)
|
||||
{
|
||||
return mat3(m[0].xyz, m[1].xyz, m[2].xyz);
|
||||
}
|
||||
mat3 MAT3x3(mat3x4 m)
|
||||
{
|
||||
return mat3(m[0].xyz, m[1].xyz, m[2].xyz);
|
||||
}
|
||||
mat3 MAT3x3(mat2 m)
|
||||
{
|
||||
return mat3(vec3(m[0].xy, 0.0), vec3(m[1].xy, 0.0), vec3(0.0, 0.0, 1.0));
|
||||
}
|
||||
|
||||
mat4 MAT4x4(vec4 a, vec4 b, vec4 c, vec4 d)
|
||||
{
|
||||
return mat4(a, b, c, d);
|
||||
}
|
||||
mat4 MAT4x4(float a1,
|
||||
float a2,
|
||||
float a3,
|
||||
float a4,
|
||||
float b1,
|
||||
float b2,
|
||||
float b3,
|
||||
float b4,
|
||||
float c1,
|
||||
float c2,
|
||||
float c3,
|
||||
float c4,
|
||||
float d1,
|
||||
float d2,
|
||||
float d3,
|
||||
float d4)
|
||||
{
|
||||
return mat4(
|
||||
vec4(a1, a2, a3, a4), vec4(b1, b2, b3, b4), vec4(c1, c2, c3, c4), vec4(d1, d2, d3, d4));
|
||||
}
|
||||
mat4 MAT4x4(float f)
|
||||
{
|
||||
return mat4(f);
|
||||
}
|
||||
mat4 MAT4x4(mat3 m)
|
||||
{
|
||||
return mat4(
|
||||
vec4(m[0].xyz, 0.0), vec4(m[1].xyz, 0.0), vec4(m[2].xyz, 0.0), vec4(0.0, 0.0, 0.0, 1.0));
|
||||
}
|
||||
mat4 MAT4x4(mat3x4 m)
|
||||
{
|
||||
return mat4(m[0], m[1], m[2], vec4(0.0, 0.0, 0.0, 1.0));
|
||||
}
|
||||
mat4 MAT4x4(mat4x3 m)
|
||||
{
|
||||
return mat4(m[0][0],
|
||||
m[0][1],
|
||||
m[0][2],
|
||||
0.0,
|
||||
m[1][0],
|
||||
m[1][1],
|
||||
m[1][2],
|
||||
0.0,
|
||||
m[2][0],
|
||||
m[2][1],
|
||||
m[2][2],
|
||||
0.0,
|
||||
m[3][0],
|
||||
m[3][1],
|
||||
m[3][2],
|
||||
0.0);
|
||||
}
|
||||
mat4 MAT4x4(mat2 m)
|
||||
{
|
||||
return mat4(vec4(m[0].xy, 0.0, 0.0),
|
||||
vec4(m[1].xy, 0.0, 0.0),
|
||||
vec4(0.0, 0.0, 1.0, 0.0),
|
||||
vec4(0.0, 0.0, 0.0, 1.0));
|
||||
}
|
||||
|
||||
mat3x4 MAT3x4(vec4 a, vec4 b, vec4 c)
|
||||
{
|
||||
return mat3x4(a, b, c);
|
||||
}
|
||||
mat3x4 MAT3x4(float a1,
|
||||
float a2,
|
||||
float a3,
|
||||
float a4,
|
||||
float b1,
|
||||
float b2,
|
||||
float b3,
|
||||
float b4,
|
||||
float c1,
|
||||
float c2,
|
||||
float c3,
|
||||
float c4)
|
||||
{
|
||||
return mat3x4(vec4(a1, a2, a3, a4), vec4(b1, b2, b3, b4), vec4(c1, c2, c3, c4));
|
||||
}
|
||||
mat3x4 MAT3x4(float f)
|
||||
{
|
||||
return mat3x4(f);
|
||||
}
|
||||
mat3x4 MAT3x4(mat3 m)
|
||||
{
|
||||
return mat3x4(vec4(m[0].xyz, 0.0), vec4(m[1].xyz, 0.0), vec4(m[2].xyz, 0.0));
|
||||
}
|
||||
mat3x4 MAT3x4(mat2 m)
|
||||
{
|
||||
return mat3x4(vec4(m[0].xy, 0.0, 0.0), vec4(m[1].xy, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0));
|
||||
}
|
||||
|
||||
#define MAT2 MAT2x2
|
||||
#define MAT3 MAT3x3
|
||||
#define MAT4 MAT4x4
|
||||
#undef ENABLE_IF
|
||||
|
||||
/* Array syntax compatibility. */
|
||||
/* clang-format off */
|
||||
@@ -2550,6 +1162,8 @@ mat3x4 MAT3x4(mat2 m)
|
||||
#define bool2_array(...) { __VA_ARGS__ }
|
||||
#define bool3_array(...) { __VA_ARGS__ }
|
||||
#define bool4_array(...) { __VA_ARGS__ }
|
||||
#define ARRAY_T(type)
|
||||
#define ARRAY_V(...) {__VA_ARGS__}
|
||||
/* clang-format on */
|
||||
|
||||
#define SHADER_LIBRARY_CREATE_INFO(a)
|
||||
|
||||
@@ -6,6 +6,26 @@
|
||||
/** IMPORTANT: Be wary of size and alignment matching for types that are present
|
||||
* in C++ shared code. */
|
||||
|
||||
/* Matrix reshaping functions. Needs to be declared before matrix type aliases. */
|
||||
#define RESHAPE(name, mat_to, mat_from) \
|
||||
mat_to to_##name(mat_from m) \
|
||||
{ \
|
||||
return mat_to(m); \
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
RESHAPE(float2x2, mat2x2, mat3x3)
|
||||
RESHAPE(float2x2, mat2x2, mat4x4)
|
||||
RESHAPE(float3x3, mat3x3, mat4x4)
|
||||
RESHAPE(float3x3, mat3x3, mat2x2)
|
||||
RESHAPE(float4x4, mat4x4, mat2x2)
|
||||
RESHAPE(float4x4, mat4x4, mat3x3)
|
||||
/* clang-format on */
|
||||
/* TODO(fclem): Remove. Use Transform instead. */
|
||||
RESHAPE(float3x3, mat3x3, mat3x4)
|
||||
|
||||
#undef RESHAPE
|
||||
|
||||
/* Boolean in GLSL are 32bit in interface structs. */
|
||||
#define bool32_t bool
|
||||
#define bool2 bvec2
|
||||
@@ -121,8 +141,19 @@ bool is_zero(vec4 A)
|
||||
#define bool2_array bool2[]
|
||||
#define bool3_array bool3[]
|
||||
#define bool4_array bool4[]
|
||||
#define ARRAY_T(type) type[]
|
||||
#define ARRAY_V
|
||||
|
||||
#define SHADER_LIBRARY_CREATE_INFO(a)
|
||||
#define VERTEX_SHADER_CREATE_INFO(a)
|
||||
#define FRAGMENT_SHADER_CREATE_INFO(a)
|
||||
#define COMPUTE_SHADER_CREATE_INFO(a)
|
||||
|
||||
#define _in_sta
|
||||
#define _in_end
|
||||
#define _out_sta
|
||||
#define _out_end
|
||||
#define _inout_sta
|
||||
#define _inout_end
|
||||
#define _shared_sta
|
||||
#define _shared_end
|
||||
|
||||
@@ -70,22 +70,22 @@ void main()
|
||||
EulerXYZ euler = EulerXYZ(1, 2, 3);
|
||||
Quaternion quat = to_quaternion(euler);
|
||||
AxisAngle axis_angle = to_axis_angle(euler);
|
||||
m = mat4(from_rotation(euler));
|
||||
m = to_float4x4(from_rotation(euler));
|
||||
EXPECT_NEAR(m, expect, 1e-5);
|
||||
m = mat4(from_rotation(quat));
|
||||
m = to_float4x4(from_rotation(quat));
|
||||
EXPECT_NEAR(m, expect, 1e-5);
|
||||
m = mat4(from_rotation(axis_angle));
|
||||
m = to_float4x4(from_rotation(axis_angle));
|
||||
EXPECT_NEAR(m, expect, 3e-4); /* Has some precision issue on some platform. */
|
||||
|
||||
m = from_scale(vec4(1, 2, 3, 4));
|
||||
expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 3, 0), vec4(0, 0, 0, 4));
|
||||
EXPECT_TRUE(is_equal(m, expect, 0.00001));
|
||||
|
||||
m = mat4(from_scale(vec3(1, 2, 3)));
|
||||
m = to_float4x4(from_scale(vec3(1, 2, 3)));
|
||||
expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 3, 0), vec4(0, 0, 0, 1));
|
||||
EXPECT_TRUE(is_equal(m, expect, 0.00001));
|
||||
|
||||
m = mat4(from_scale(vec2(1, 2)));
|
||||
m = to_float4x4(from_scale(vec2(1, 2)));
|
||||
expect = mat4x4(vec4(1, 0, 0, 0), vec4(0, 2, 0, 0), vec4(0, 0, 1, 0), vec4(0, 0, 0, 1));
|
||||
EXPECT_TRUE(is_equal(m, expect, 0.00001));
|
||||
|
||||
@@ -182,8 +182,8 @@ void main()
|
||||
EulerXYZ eul;
|
||||
Quaternion qt;
|
||||
vec3 scale;
|
||||
to_rot_scale(mat3x3(m), eul, scale);
|
||||
to_rot_scale(mat3x3(m), qt, scale);
|
||||
to_rot_scale(to_float3x3(m), eul, scale);
|
||||
to_rot_scale(to_float3x3(m), qt, scale);
|
||||
EXPECT_NEAR(scale, expect_scale, 0.00001);
|
||||
EXPECT_NEAR(as_vec4(qt), as_vec4(expect_qt), 0.0002);
|
||||
EXPECT_NEAR(as_vec3(eul), as_vec3(expect_eul), 0.0002);
|
||||
|
||||
@@ -236,9 +236,6 @@ void VKDevice::init_glsl_patch()
|
||||
ss << "#define gpu_BaryCoordNoPersp gl_BaryCoordNoPerspEXT\n";
|
||||
}
|
||||
|
||||
ss << "#define DFDX_SIGN 1.0\n";
|
||||
ss << "#define DFDY_SIGN 1.0\n";
|
||||
|
||||
/* GLSL Backend Lib. */
|
||||
ss << datatoc_glsl_shader_defines_glsl;
|
||||
glsl_patch_ = ss.str();
|
||||
|
||||
@@ -1009,7 +1009,7 @@ static PyObject *pygpu_shader_create_from_info(BPyGPUShader * /*self*/, BPyGPUSh
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
GPUShader *shader = GPU_shader_create_from_info(o->info);
|
||||
GPUShader *shader = GPU_shader_create_from_info_python(o->info);
|
||||
if (!shader) {
|
||||
PyErr_SetString(PyExc_Exception, "Shader Compile Error, see console for more details");
|
||||
return nullptr;
|
||||
|
||||
Reference in New Issue
Block a user