This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
136 lines
4.0 KiB
C++
136 lines
4.0 KiB
C++
/*
|
|
* Copyright 2011-2018 Blender Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "kernel/bvh/bvh.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
#ifdef __SHADER_RAYTRACE__
|
|
|
|
# ifdef __KERNEL_OPTIX__
|
|
extern "C" __device__ float __direct_callable__svm_node_ao(INTEGRATOR_STATE_CONST_ARGS,
|
|
# else
|
|
ccl_device float svm_ao(INTEGRATOR_STATE_CONST_ARGS,
|
|
# endif
|
|
ccl_private ShaderData *sd,
|
|
float3 N,
|
|
float max_dist,
|
|
int num_samples,
|
|
int flags)
|
|
{
|
|
if (flags & NODE_AO_GLOBAL_RADIUS) {
|
|
max_dist = kernel_data.integrator.ao_bounces_distance;
|
|
}
|
|
|
|
/* Early out if no sampling needed. */
|
|
if (max_dist <= 0.0f || num_samples < 1 || sd->object == OBJECT_NONE) {
|
|
return 1.0f;
|
|
}
|
|
|
|
/* Can't raytrace from shaders like displacement, before BVH exists. */
|
|
if (kernel_data.bvh.bvh_layout == BVH_LAYOUT_NONE) {
|
|
return 1.0f;
|
|
}
|
|
|
|
if (flags & NODE_AO_INSIDE) {
|
|
N = -N;
|
|
}
|
|
|
|
float3 T, B;
|
|
make_orthonormals(N, &T, &B);
|
|
|
|
/* TODO: support ray-tracing in shadow shader evaluation? */
|
|
RNGState rng_state;
|
|
path_state_rng_load(INTEGRATOR_STATE_PASS, &rng_state);
|
|
|
|
int unoccluded = 0;
|
|
for (int sample = 0; sample < num_samples; sample++) {
|
|
float disk_u, disk_v;
|
|
path_branched_rng_2D(kg, &rng_state, sample, num_samples, PRNG_BEVEL_U, &disk_u, &disk_v);
|
|
|
|
float2 d = concentric_sample_disk(disk_u, disk_v);
|
|
float3 D = make_float3(d.x, d.y, safe_sqrtf(1.0f - dot(d, d)));
|
|
|
|
/* Create ray. */
|
|
Ray ray;
|
|
ray.P = ray_offset(sd->P, N);
|
|
ray.D = D.x * T + D.y * B + D.z * N;
|
|
ray.t = max_dist;
|
|
ray.time = sd->time;
|
|
ray.dP = differential_zero_compact();
|
|
ray.dD = differential_zero_compact();
|
|
|
|
if (flags & NODE_AO_ONLY_LOCAL) {
|
|
if (!scene_intersect_local(kg, &ray, NULL, sd->object, NULL, 0)) {
|
|
unoccluded++;
|
|
}
|
|
}
|
|
else {
|
|
Intersection isect;
|
|
if (!scene_intersect(kg, &ray, PATH_RAY_SHADOW_OPAQUE, &isect)) {
|
|
unoccluded++;
|
|
}
|
|
}
|
|
}
|
|
|
|
return ((float)unoccluded) / num_samples;
|
|
}
|
|
|
|
template<uint node_feature_mask>
|
|
# if defined(__KERNEL_OPTIX__)
|
|
ccl_device_inline
|
|
# else
|
|
ccl_device_noinline
|
|
# endif
|
|
void
|
|
svm_node_ao(INTEGRATOR_STATE_CONST_ARGS,
|
|
ccl_private ShaderData *sd,
|
|
ccl_private float *stack,
|
|
uint4 node)
|
|
{
|
|
uint flags, dist_offset, normal_offset, out_ao_offset;
|
|
svm_unpack_node_uchar4(node.y, &flags, &dist_offset, &normal_offset, &out_ao_offset);
|
|
|
|
uint color_offset, out_color_offset, samples;
|
|
svm_unpack_node_uchar3(node.z, &color_offset, &out_color_offset, &samples);
|
|
|
|
float dist = stack_load_float_default(stack, dist_offset, node.w);
|
|
float3 normal = stack_valid(normal_offset) ? stack_load_float3(stack, normal_offset) : sd->N;
|
|
|
|
float ao = 1.0f;
|
|
|
|
if (KERNEL_NODES_FEATURE(RAYTRACE)) {
|
|
# ifdef __KERNEL_OPTIX__
|
|
ao = optixDirectCall<float>(0, INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags);
|
|
# else
|
|
ao = svm_ao(INTEGRATOR_STATE_PASS, sd, normal, dist, samples, flags);
|
|
# endif
|
|
}
|
|
|
|
if (stack_valid(out_ao_offset)) {
|
|
stack_store_float(stack, out_ao_offset, ao);
|
|
}
|
|
|
|
if (stack_valid(out_color_offset)) {
|
|
float3 color = stack_load_float3(stack, color_offset);
|
|
stack_store_float3(stack, out_color_offset, ao * color);
|
|
}
|
|
}
|
|
|
|
#endif /* __SHADER_RAYTRACE__ */
|
|
|
|
CCL_NAMESPACE_END
|