Fix Cycles Metal build errors after recent changes
float8 is a reserved type in Metal, but is not implemented. So rename to float8_t for now. Also move back intersection handlers to kernel.metal, they can't be in the class that encapsulates the other Metal kernel functions.
This commit is contained in:
@@ -47,767 +47,6 @@ struct MetalRTIntersectionShadowPayload {
|
||||
bool result;
|
||||
};
|
||||
|
||||
/* Intersection return types. */
|
||||
|
||||
/* For a bounding box intersection function. */
|
||||
struct BoundingBoxIntersectionResult {
|
||||
bool accept [[accept_intersection]];
|
||||
bool continue_search [[continue_search]];
|
||||
float distance [[distance]];
|
||||
};
|
||||
|
||||
/* For a triangle intersection function. */
|
||||
struct TriangleIntersectionResult {
|
||||
bool accept [[accept_intersection]];
|
||||
bool continue_search [[continue_search]];
|
||||
};
|
||||
|
||||
enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
|
||||
|
||||
/* Utilities. */
|
||||
|
||||
ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self,
|
||||
const int object,
|
||||
const int prim)
|
||||
{
|
||||
return (self.prim == prim) && (self.object == object);
|
||||
}
|
||||
|
||||
ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self,
|
||||
const int object,
|
||||
const int prim)
|
||||
{
|
||||
return ((self.prim == prim) && (self.object == object)) ||
|
||||
((self.light_prim == prim) && (self.light_object == object));
|
||||
}
|
||||
|
||||
ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self,
|
||||
const int prim)
|
||||
{
|
||||
return (self.prim == prim);
|
||||
}
|
||||
|
||||
/* Hit functions. */
|
||||
|
||||
template<typename TReturn, uint intersection_type>
|
||||
TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
|
||||
const uint object,
|
||||
const uint primitive_id,
|
||||
const float2 barycentrics,
|
||||
const float ray_tmax)
|
||||
{
|
||||
TReturn result;
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
|
||||
if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) {
|
||||
/* Only intersect with matching object and skip self-intersecton. */
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
const short max_hits = payload.max_hits;
|
||||
if (max_hits == 0) {
|
||||
/* Special case for when no hit information is requested, just report that something was hit */
|
||||
payload.result = true;
|
||||
result.accept = true;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
int hit = 0;
|
||||
if (payload.has_lcg_state) {
|
||||
for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) {
|
||||
if (ray_tmax == payload.local_isect.hits[i].t) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
hit = payload.local_isect.num_hits++;
|
||||
|
||||
if (payload.local_isect.num_hits > max_hits) {
|
||||
hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits;
|
||||
if (hit >= max_hits) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
|
||||
/* Record closest intersection only. Do not terminate ray here, since there is no guarantee
|
||||
* about distance ordering in any-hit */
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
payload.local_isect.num_hits = 1;
|
||||
}
|
||||
|
||||
ray_data Intersection *isect = &payload.local_isect.hits[hit];
|
||||
isect->t = ray_tmax;
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
isect->u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
isect->v = barycentrics.x;
|
||||
|
||||
/* Record geometric normal */
|
||||
const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
|
||||
const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0));
|
||||
const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1));
|
||||
const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2));
|
||||
payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||
|
||||
/* Continue tracing (without this the trace call would return after the first hit) */
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
|
||||
uint instance_id [[user_instance_id]],
|
||||
uint primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
{
|
||||
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.distance = ray_tmax;
|
||||
result.accept = false;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
template<uint intersection_type>
|
||||
bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
uint object,
|
||||
uint prim,
|
||||
const float2 barycentrics,
|
||||
const float ray_tmax)
|
||||
{
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
if (intersection_skip_self_shadow(payload.self, object, prim)) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
|
||||
float u = 0.0f, v = 0.0f;
|
||||
int type = 0;
|
||||
if (intersection_type == METALRT_HIT_TRIANGLE) {
|
||||
u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
v = barycentrics.x;
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
# ifdef __HAIR__
|
||||
else {
|
||||
u = barycentrics.x;
|
||||
v = barycentrics.y;
|
||||
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
|
||||
/* Filter out curve endcaps */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifndef __TRANSPARENT_SHADOWS__
|
||||
/* No transparent shadows support compiled in, make opaque. */
|
||||
payload.result = true;
|
||||
/* terminate ray */
|
||||
return false;
|
||||
# else
|
||||
short max_hits = payload.max_hits;
|
||||
short num_hits = payload.num_hits;
|
||||
short num_recorded_hits = payload.num_recorded_hits;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
|
||||
/* If no transparent shadows, all light is blocked and we can stop immediately. */
|
||||
if (num_hits >= max_hits ||
|
||||
!(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
|
||||
payload.result = true;
|
||||
/* terminate ray */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (type & PRIMITIVE_CURVE) {
|
||||
float throughput = payload.throughput;
|
||||
throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
|
||||
payload.throughput = throughput;
|
||||
payload.num_hits += 1;
|
||||
|
||||
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
|
||||
/* Accept result and terminate if throughput is sufficiently low */
|
||||
payload.result = true;
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
payload.num_hits += 1;
|
||||
payload.num_recorded_hits += 1;
|
||||
|
||||
uint record_index = num_recorded_hits;
|
||||
|
||||
const IntegratorShadowState state = payload.state;
|
||||
|
||||
const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
if (record_index >= max_record_hits) {
|
||||
/* If maximum number of hits reached, find a hit to replace. */
|
||||
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
|
||||
uint max_recorded_hit = 0;
|
||||
|
||||
for (int i = 1; i < max_record_hits; i++) {
|
||||
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
|
||||
if (isect_t > max_recorded_t) {
|
||||
max_recorded_t = isect_t;
|
||||
max_recorded_hit = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (ray_tmax >= max_recorded_t) {
|
||||
/* Accept hit, so that we don't consider any more hits beyond the distance of the
|
||||
* current hit anymore. */
|
||||
payload.result = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
record_index = max_recorded_hit;
|
||||
}
|
||||
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
||||
|
||||
/* Continue tracing. */
|
||||
# endif /* __TRANSPARENT_SHADOWS__ */
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
|
||||
TriangleIntersectionResult result;
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.distance = ray_tmax;
|
||||
result.accept = false;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename TReturnType, uint intersection_type>
|
||||
inline TReturnType metalrt_visibility_test(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const float u)
|
||||
{
|
||||
TReturnType result;
|
||||
|
||||
#ifdef __HAIR__
|
||||
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
|
||||
/* Filter out curve endcaps. */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
uint visibility = payload.visibility;
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Shadow ray early termination. */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
if (intersection_skip_self_shadow(payload.self, object, prim)) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
else {
|
||||
result.accept = true;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (intersection_skip_self(payload.self, object, prim)) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.accept = true;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_visibility_test_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
TriangleIntersectionResult result =
|
||||
metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, object, prim, 0.0f);
|
||||
if (result.accept) {
|
||||
payload.prim = prim;
|
||||
payload.type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* Unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Primitive intersection functions. */
|
||||
|
||||
#ifdef __HAIR__
|
||||
ccl_device_inline void metalrt_intersection_curve(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
result.distance = isect.t;
|
||||
payload.u = isect.u;
|
||||
payload.v = isect.v;
|
||||
payload.prim = prim;
|
||||
payload.type = type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void metalrt_intersection_curve_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
const uint visibility = payload.visibility;
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
}
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
|
||||
metalrt_intersection_curve(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_ribbon_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
|
||||
metalrt_intersection_curve_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
metalrt_intersection_curve(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_all_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_curve_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* __HAIR__ */
|
||||
|
||||
#ifdef __POINTCLOUD__
|
||||
ccl_device_inline void metalrt_intersection_point(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
result.distance = isect.t;
|
||||
payload.u = isect.u;
|
||||
payload.v = isect.v;
|
||||
payload.prim = prim;
|
||||
payload.type = type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void metalrt_intersection_point_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
const uint visibility = payload.visibility;
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
|
||||
if (result.accept) {
|
||||
result.distance = isect.t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_point(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
prim,
|
||||
type,
|
||||
ray_origin,
|
||||
ray_direction,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_point_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
prim,
|
||||
type,
|
||||
ray_origin,
|
||||
ray_direction,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* __POINTCLOUD__ */
|
||||
|
||||
/* Scene intersection. */
|
||||
|
||||
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
@@ -815,7 +54,7 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
const uint visibility,
|
||||
ccl_private Intersection *isect)
|
||||
{
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
if (!intersection_ray_valid(ray)) {
|
||||
isect->t = ray->tmax;
|
||||
isect->type = PRIMITIVE_NONE;
|
||||
return false;
|
||||
|
||||
@@ -1,9 +1,777 @@
|
||||
/* SPDX-License-Identifier: Apache-2.0
|
||||
* Copyright 2021-2022 Blender Foundation */
|
||||
|
||||
/* Metal kernel entry points */
|
||||
/* Metal kernel entry points. */
|
||||
|
||||
#include "kernel/device/metal/compat.h"
|
||||
#include "kernel/device/metal/globals.h"
|
||||
#include "kernel/device/metal/function_constants.h"
|
||||
#include "kernel/device/gpu/kernel.h"
|
||||
|
||||
/* MetalRT intersection handlers. */
|
||||
|
||||
#ifdef __METALRT__
|
||||
|
||||
/* Intersection return types. */
|
||||
|
||||
/* For a bounding box intersection function. */
|
||||
struct BoundingBoxIntersectionResult {
|
||||
bool accept [[accept_intersection]];
|
||||
bool continue_search [[continue_search]];
|
||||
float distance [[distance]];
|
||||
};
|
||||
|
||||
/* For a triangle intersection function. */
|
||||
struct TriangleIntersectionResult {
|
||||
bool accept [[accept_intersection]];
|
||||
bool continue_search [[continue_search]];
|
||||
};
|
||||
|
||||
enum { METALRT_HIT_TRIANGLE, METALRT_HIT_BOUNDING_BOX };
|
||||
|
||||
/* Utilities. */
|
||||
|
||||
ccl_device_inline bool intersection_skip_self(ray_data const RaySelfPrimitives &self,
|
||||
const int object,
|
||||
const int prim)
|
||||
{
|
||||
return (self.prim == prim) && (self.object == object);
|
||||
}
|
||||
|
||||
ccl_device_inline bool intersection_skip_self_shadow(ray_data const RaySelfPrimitives &self,
|
||||
const int object,
|
||||
const int prim)
|
||||
{
|
||||
return ((self.prim == prim) && (self.object == object)) ||
|
||||
((self.light_prim == prim) && (self.light_object == object));
|
||||
}
|
||||
|
||||
ccl_device_inline bool intersection_skip_self_local(ray_data const RaySelfPrimitives &self,
|
||||
const int prim)
|
||||
{
|
||||
return (self.prim == prim);
|
||||
}
|
||||
|
||||
/* Hit functions. */
|
||||
|
||||
template<typename TReturn, uint intersection_type>
|
||||
TReturn metalrt_local_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload,
|
||||
const uint object,
|
||||
const uint primitive_id,
|
||||
const float2 barycentrics,
|
||||
const float ray_tmax)
|
||||
{
|
||||
TReturn result;
|
||||
|
||||
#ifdef __BVH_LOCAL__
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
|
||||
if ((object != payload.local_object) || intersection_skip_self_local(payload.self, prim)) {
|
||||
/* Only intersect with matching object and skip self-intersecton. */
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
const short max_hits = payload.max_hits;
|
||||
if (max_hits == 0) {
|
||||
/* Special case for when no hit information is requested, just report that something was hit */
|
||||
payload.result = true;
|
||||
result.accept = true;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
int hit = 0;
|
||||
if (payload.has_lcg_state) {
|
||||
for (short i = min(max_hits, short(payload.local_isect.num_hits)) - 1; i >= 0; --i) {
|
||||
if (ray_tmax == payload.local_isect.hits[i].t) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
hit = payload.local_isect.num_hits++;
|
||||
|
||||
if (payload.local_isect.num_hits > max_hits) {
|
||||
hit = lcg_step_uint(&payload.lcg_state) % payload.local_isect.num_hits;
|
||||
if (hit >= max_hits) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (payload.local_isect.num_hits && ray_tmax > payload.local_isect.hits[0].t) {
|
||||
/* Record closest intersection only. Do not terminate ray here, since there is no guarantee
|
||||
* about distance ordering in any-hit */
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
payload.local_isect.num_hits = 1;
|
||||
}
|
||||
|
||||
ray_data Intersection *isect = &payload.local_isect.hits[hit];
|
||||
isect->t = ray_tmax;
|
||||
isect->prim = prim;
|
||||
isect->object = object;
|
||||
isect->type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
isect->u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
isect->v = barycentrics.x;
|
||||
|
||||
/* Record geometric normal */
|
||||
const uint tri_vindex = kernel_data_fetch(tri_vindex, isect->prim).w;
|
||||
const float3 tri_a = float3(kernel_data_fetch(tri_verts, tri_vindex + 0));
|
||||
const float3 tri_b = float3(kernel_data_fetch(tri_verts, tri_vindex + 1));
|
||||
const float3 tri_c = float3(kernel_data_fetch(tri_verts, tri_vindex + 2));
|
||||
payload.local_isect.Ng[hit] = normalize(cross(tri_b - tri_a, tri_c - tri_a));
|
||||
|
||||
/* Continue tracing (without this the trace call would return after the first hit) */
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
#endif
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionLocalPayload &payload [[payload]],
|
||||
uint instance_id [[user_instance_id]],
|
||||
uint primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
{
|
||||
return metalrt_local_hit<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, instance_id, primitive_id, barycentrics, ray_tmax);
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_local_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.distance = ray_tmax;
|
||||
result.accept = false;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
template<uint intersection_type>
|
||||
bool metalrt_shadow_all_hit(constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
uint object,
|
||||
uint prim,
|
||||
const float2 barycentrics,
|
||||
const float ray_tmax)
|
||||
{
|
||||
#ifdef __SHADOW_RECORD_ALL__
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
if (intersection_skip_self_shadow(payload.self, object, prim)) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
|
||||
float u = 0.0f, v = 0.0f;
|
||||
int type = 0;
|
||||
if (intersection_type == METALRT_HIT_TRIANGLE) {
|
||||
u = 1.0f - barycentrics.y - barycentrics.x;
|
||||
v = barycentrics.x;
|
||||
type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
# ifdef __HAIR__
|
||||
else {
|
||||
u = barycentrics.x;
|
||||
v = barycentrics.y;
|
||||
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
type = segment.type;
|
||||
prim = segment.prim;
|
||||
|
||||
/* Filter out curve endcaps */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
/* continue search */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifndef __TRANSPARENT_SHADOWS__
|
||||
/* No transparent shadows support compiled in, make opaque. */
|
||||
payload.result = true;
|
||||
/* terminate ray */
|
||||
return false;
|
||||
# else
|
||||
short max_hits = payload.max_hits;
|
||||
short num_hits = payload.num_hits;
|
||||
short num_recorded_hits = payload.num_recorded_hits;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
|
||||
/* If no transparent shadows, all light is blocked and we can stop immediately. */
|
||||
if (num_hits >= max_hits ||
|
||||
!(context.intersection_get_shader_flags(NULL, prim, type) & SD_HAS_TRANSPARENT_SHADOW)) {
|
||||
payload.result = true;
|
||||
/* terminate ray */
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Always use baked shadow transparency for curves. */
|
||||
if (type & PRIMITIVE_CURVE) {
|
||||
float throughput = payload.throughput;
|
||||
throughput *= context.intersection_curve_shadow_transparency(nullptr, object, prim, u);
|
||||
payload.throughput = throughput;
|
||||
payload.num_hits += 1;
|
||||
|
||||
if (throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
|
||||
/* Accept result and terminate if throughput is sufficiently low */
|
||||
payload.result = true;
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
payload.num_hits += 1;
|
||||
payload.num_recorded_hits += 1;
|
||||
|
||||
uint record_index = num_recorded_hits;
|
||||
|
||||
const IntegratorShadowState state = payload.state;
|
||||
|
||||
const uint max_record_hits = min(uint(max_hits), INTEGRATOR_SHADOW_ISECT_SIZE);
|
||||
if (record_index >= max_record_hits) {
|
||||
/* If maximum number of hits reached, find a hit to replace. */
|
||||
float max_recorded_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, 0, t);
|
||||
uint max_recorded_hit = 0;
|
||||
|
||||
for (int i = 1; i < max_record_hits; i++) {
|
||||
const float isect_t = INTEGRATOR_STATE_ARRAY(state, shadow_isect, i, t);
|
||||
if (isect_t > max_recorded_t) {
|
||||
max_recorded_t = isect_t;
|
||||
max_recorded_hit = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (ray_tmax >= max_recorded_t) {
|
||||
/* Accept hit, so that we don't consider any more hits beyond the distance of the
|
||||
* current hit anymore. */
|
||||
payload.result = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
record_index = max_recorded_hit;
|
||||
}
|
||||
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, u) = u;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, v) = v;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, t) = ray_tmax;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, prim) = prim;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, object) = object;
|
||||
INTEGRATOR_STATE_ARRAY_WRITE(state, shadow_isect, record_index, type) = type;
|
||||
|
||||
/* Continue tracing. */
|
||||
# endif /* __TRANSPARENT_SHADOWS__ */
|
||||
#endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]],
|
||||
float2 barycentrics [[barycentric_coord]],
|
||||
float ray_tmax [[distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
|
||||
TriangleIntersectionResult result;
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, object, prim, barycentrics, ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_shadow_all_hit_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.distance = ray_tmax;
|
||||
result.accept = false;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename TReturnType, uint intersection_type>
|
||||
inline TReturnType metalrt_visibility_test(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const float u)
|
||||
{
|
||||
TReturnType result;
|
||||
|
||||
#ifdef __HAIR__
|
||||
if (intersection_type == METALRT_HIT_BOUNDING_BOX) {
|
||||
/* Filter out curve endcaps. */
|
||||
if (u == 0.0f || u == 1.0f) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
uint visibility = payload.visibility;
|
||||
#ifdef __VISIBILITY_FLAG__
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Shadow ray early termination. */
|
||||
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
|
||||
if (intersection_skip_self_shadow(payload.self, object, prim)) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
else {
|
||||
result.accept = true;
|
||||
result.continue_search = false;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (intersection_skip_self(payload.self, object, prim)) {
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
result.accept = true;
|
||||
result.continue_search = true;
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(triangle, triangle_data, METALRT_TAGS)]] TriangleIntersectionResult
|
||||
__anyhit__cycles_metalrt_visibility_test_tri(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
unsigned int object [[user_instance_id]],
|
||||
unsigned int primitive_id [[primitive_id]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
TriangleIntersectionResult result =
|
||||
metalrt_visibility_test<TriangleIntersectionResult, METALRT_HIT_TRIANGLE>(
|
||||
launch_params_metal, payload, object, prim, 0.0f);
|
||||
if (result.accept) {
|
||||
payload.prim = prim;
|
||||
payload.type = kernel_data_fetch(objects, object).primitive_type;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__anyhit__cycles_metalrt_visibility_test_box(const float ray_tmax [[max_distance]])
|
||||
{
|
||||
/* Unused function */
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Primitive intersection functions. */
|
||||
|
||||
#ifdef __HAIR__
|
||||
ccl_device_inline void metalrt_intersection_curve(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
result.distance = isect.t;
|
||||
payload.u = isect.u;
|
||||
payload.v = isect.v;
|
||||
payload.prim = prim;
|
||||
payload.type = type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void metalrt_intersection_curve_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
const uint visibility = payload.visibility;
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.curve_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
}
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_ribbon(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
|
||||
metalrt_intersection_curve(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_ribbon_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
if (segment.type & PRIMITIVE_CURVE_RIBBON) {
|
||||
metalrt_intersection_curve_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_all(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
metalrt_intersection_curve(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__curve_all_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_P [[origin]],
|
||||
const float3 ray_D [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, prim);
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_curve_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
segment.prim,
|
||||
segment.type,
|
||||
ray_P,
|
||||
ray_D,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* __HAIR__ */
|
||||
|
||||
#ifdef __POINTCLOUD__
|
||||
ccl_device_inline void metalrt_intersection_point(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
# ifdef __VISIBILITY_FLAG__
|
||||
const uint visibility = payload.visibility;
|
||||
if ((kernel_data_fetch(objects, object).visibility & visibility) == 0) {
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result = metalrt_visibility_test<BoundingBoxIntersectionResult, METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, isect.u);
|
||||
if (result.accept) {
|
||||
result.distance = isect.t;
|
||||
payload.u = isect.u;
|
||||
payload.v = isect.v;
|
||||
payload.prim = prim;
|
||||
payload.type = type;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void metalrt_intersection_point_shadow(
|
||||
constant KernelParamsMetal &launch_params_metal,
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload,
|
||||
const uint object,
|
||||
const uint prim,
|
||||
const uint type,
|
||||
const float3 ray_P,
|
||||
const float3 ray_D,
|
||||
float time,
|
||||
const float ray_tmin,
|
||||
const float ray_tmax,
|
||||
thread BoundingBoxIntersectionResult &result)
|
||||
{
|
||||
const uint visibility = payload.visibility;
|
||||
|
||||
Intersection isect;
|
||||
isect.t = ray_tmax;
|
||||
|
||||
MetalKernelContext context(launch_params_metal);
|
||||
if (context.point_intersect(
|
||||
NULL, &isect, ray_P, ray_D, ray_tmin, isect.t, object, prim, time, type)) {
|
||||
result.continue_search = metalrt_shadow_all_hit<METALRT_HIT_BOUNDING_BOX>(
|
||||
launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax);
|
||||
result.accept = !result.continue_search;
|
||||
|
||||
if (result.accept) {
|
||||
result.distance = isect.t;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_point(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
prim,
|
||||
type,
|
||||
ray_origin,
|
||||
ray_direction,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] BoundingBoxIntersectionResult
|
||||
__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]],
|
||||
ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload
|
||||
[[payload]],
|
||||
const uint object [[user_instance_id]],
|
||||
const uint primitive_id [[primitive_id]],
|
||||
const float3 ray_origin [[origin]],
|
||||
const float3 ray_direction [[direction]],
|
||||
const float ray_tmin [[min_distance]],
|
||||
const float ray_tmax [[max_distance]])
|
||||
{
|
||||
const uint prim = primitive_id + kernel_data_fetch(object_prim_offset, object);
|
||||
const int type = kernel_data_fetch(objects, object).primitive_type;
|
||||
|
||||
BoundingBoxIntersectionResult result;
|
||||
result.accept = false;
|
||||
result.continue_search = true;
|
||||
result.distance = ray_tmax;
|
||||
|
||||
metalrt_intersection_point_shadow(launch_params_metal,
|
||||
payload,
|
||||
object,
|
||||
prim,
|
||||
type,
|
||||
ray_origin,
|
||||
ray_direction,
|
||||
# if defined(__METALRT_MOTION__)
|
||||
payload.time,
|
||||
# else
|
||||
0.0f,
|
||||
# endif
|
||||
ray_tmin,
|
||||
ray_tmax,
|
||||
result);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* __POINTCLOUD__ */
|
||||
#endif /* __METALRT__ */
|
||||
|
||||
@@ -14,187 +14,187 @@ CCL_NAMESPACE_BEGIN
|
||||
* Declaration.
|
||||
*/
|
||||
|
||||
ccl_device_inline float8 operator+(const float8 &a, const float8 &b);
|
||||
ccl_device_inline float8 operator+(const float8 &a, const float f);
|
||||
ccl_device_inline float8 operator+(const float f, const float8 &a);
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float f);
|
||||
ccl_device_inline float8_t operator+(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8 operator-(const float8 &a);
|
||||
ccl_device_inline float8 operator-(const float8 &a, const float8 &b);
|
||||
ccl_device_inline float8 operator-(const float8 &a, const float f);
|
||||
ccl_device_inline float8 operator-(const float f, const float8 &a);
|
||||
ccl_device_inline float8_t operator-(const float8_t a);
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float f);
|
||||
ccl_device_inline float8_t operator-(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8 operator*(const float8 &a, const float8 &b);
|
||||
ccl_device_inline float8 operator*(const float8 &a, const float f);
|
||||
ccl_device_inline float8 operator*(const float f, const float8 &a);
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float f);
|
||||
ccl_device_inline float8_t operator*(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8 operator/(const float8 &a, const float8 &b);
|
||||
ccl_device_inline float8 operator/(const float8 &a, float f);
|
||||
ccl_device_inline float8 operator/(const float f, const float8 &a);
|
||||
ccl_device_inline float8_t operator/(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator/(const float8_t a, float f);
|
||||
ccl_device_inline float8_t operator/(const float f, const float8_t a);
|
||||
|
||||
ccl_device_inline float8 operator+=(float8 &a, const float8 &b);
|
||||
ccl_device_inline float8_t operator+=(float8_t a, const float8_t b);
|
||||
|
||||
ccl_device_inline float8 operator*=(float8 &a, const float8 &b);
|
||||
ccl_device_inline float8 operator*=(float8 &a, float f);
|
||||
ccl_device_inline float8_t operator*=(float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t operator*=(float8_t a, float f);
|
||||
|
||||
ccl_device_inline float8 operator/=(float8 &a, float f);
|
||||
ccl_device_inline float8_t operator/=(float8_t a, float f);
|
||||
|
||||
ccl_device_inline bool operator==(const float8 &a, const float8 &b);
|
||||
ccl_device_inline bool operator==(const float8_t a, const float8_t b);
|
||||
|
||||
ccl_device_inline float8 rcp(const float8 &a);
|
||||
ccl_device_inline float8 sqrt(const float8 &a);
|
||||
ccl_device_inline float8 sqr(const float8 &a);
|
||||
ccl_device_inline bool is_zero(const float8 &a);
|
||||
ccl_device_inline float average(const float8 &a);
|
||||
ccl_device_inline float8 min(const float8 &a, const float8 &b);
|
||||
ccl_device_inline float8 max(const float8 &a, const float8 &b);
|
||||
ccl_device_inline float8 clamp(const float8 &a, const float8 &mn, const float8 &mx);
|
||||
ccl_device_inline float8 fabs(const float8 &a);
|
||||
ccl_device_inline float8 mix(const float8 &a, const float8 &b, float t);
|
||||
ccl_device_inline float8_t rcp(const float8_t a);
|
||||
ccl_device_inline float8_t sqrt(const float8_t a);
|
||||
ccl_device_inline float8_t sqr(const float8_t a);
|
||||
ccl_device_inline bool is_zero(const float8_t a);
|
||||
ccl_device_inline float average(const float8_t a);
|
||||
ccl_device_inline float8_t min(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t max(const float8_t a, const float8_t b);
|
||||
ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx);
|
||||
ccl_device_inline float8_t fabs(const float8_t a);
|
||||
ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t);
|
||||
ccl_device_inline float8_t saturate(const float8_t a);
|
||||
|
||||
ccl_device_inline float8 safe_divide(const float8 a, const float b);
|
||||
ccl_device_inline float8 safe_divide(const float8 a, const float8 b);
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float b);
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b);
|
||||
|
||||
ccl_device_inline float reduce_min(const float8 &a);
|
||||
ccl_device_inline float reduce_max(const float8 &a);
|
||||
ccl_device_inline float reduce_add(const float8 &a);
|
||||
ccl_device_inline float reduce_min(const float8_t a);
|
||||
ccl_device_inline float reduce_max(const float8_t a);
|
||||
ccl_device_inline float reduce_add(const float8_t a);
|
||||
|
||||
ccl_device_inline float8 saturate(const float8 &a);
|
||||
ccl_device_inline bool isequal(const float8 a, const float8 b);
|
||||
ccl_device_inline bool isequal(const float8_t a, const float8_t b);
|
||||
|
||||
/*******************************************************************************
|
||||
* Definition.
|
||||
*/
|
||||
|
||||
ccl_device_inline float8 zero_float8()
|
||||
ccl_device_inline float8_t zero_float8_t()
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_setzero_ps());
|
||||
return float8_t(_mm256_setzero_ps());
|
||||
#else
|
||||
return make_float8(0.0f);
|
||||
return make_float8_t(0.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 one_float8()
|
||||
ccl_device_inline float8_t one_float8_t()
|
||||
{
|
||||
return make_float8(1.0f);
|
||||
return make_float8_t(1.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator+(const float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_add_ps(a.m256, b.m256));
|
||||
return float8_t(_mm256_add_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8(
|
||||
return make_float8_t(
|
||||
a.a + b.a, a.b + b.b, a.c + b.c, a.d + b.d, a.e + b.e, a.f + b.f, a.g + b.g, a.h + b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator+(const float8 &a, const float f)
|
||||
ccl_device_inline float8_t operator+(const float8_t a, const float f)
|
||||
{
|
||||
return a + make_float8(f);
|
||||
return a + make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator+(const float f, const float8 &a)
|
||||
ccl_device_inline float8_t operator+(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8(f) + a;
|
||||
return make_float8_t(f) + a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator-(const float8 &a)
|
||||
ccl_device_inline float8_t operator-(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
__m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
|
||||
return float8(_mm256_xor_ps(a.m256, mask));
|
||||
return float8_t(_mm256_xor_ps(a.m256, mask));
|
||||
#else
|
||||
return make_float8(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h);
|
||||
return make_float8_t(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator-(const float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_sub_ps(a.m256, b.m256));
|
||||
return float8_t(_mm256_sub_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8(
|
||||
return make_float8_t(
|
||||
a.a - b.a, a.b - b.b, a.c - b.c, a.d - b.d, a.e - b.e, a.f - b.f, a.g - b.g, a.h - b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator-(const float8 &a, const float f)
|
||||
ccl_device_inline float8_t operator-(const float8_t a, const float f)
|
||||
{
|
||||
return a - make_float8(f);
|
||||
return a - make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator-(const float f, const float8 &a)
|
||||
ccl_device_inline float8_t operator-(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8(f) - a;
|
||||
return make_float8_t(f) - a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator*(const float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_mul_ps(a.m256, b.m256));
|
||||
return float8_t(_mm256_mul_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8(
|
||||
return make_float8_t(
|
||||
a.a * b.a, a.b * b.b, a.c * b.c, a.d * b.d, a.e * b.e, a.f * b.f, a.g * b.g, a.h * b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator*(const float8 &a, const float f)
|
||||
ccl_device_inline float8_t operator*(const float8_t a, const float f)
|
||||
{
|
||||
return a * make_float8(f);
|
||||
return a * make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator*(const float f, const float8 &a)
|
||||
ccl_device_inline float8_t operator*(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8(f) * a;
|
||||
return make_float8_t(f) * a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator/(const float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t operator/(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_div_ps(a.m256, b.m256));
|
||||
return float8_t(_mm256_div_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8(
|
||||
return make_float8_t(
|
||||
a.a / b.a, a.b / b.b, a.c / b.c, a.d / b.d, a.e / b.e, a.f / b.f, a.g / b.g, a.h / b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator/(const float8 &a, const float f)
|
||||
ccl_device_inline float8_t operator/(const float8_t a, const float f)
|
||||
{
|
||||
return a / make_float8(f);
|
||||
return a / make_float8_t(f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator/(const float f, const float8 &a)
|
||||
ccl_device_inline float8_t operator/(const float f, const float8_t a)
|
||||
{
|
||||
return make_float8(f) / a;
|
||||
return make_float8_t(f) / a;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator+=(float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t operator+=(float8_t a, const float8_t b)
|
||||
{
|
||||
return a = a + b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator-=(float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t operator-=(float8_t a, const float8_t b)
|
||||
{
|
||||
return a = a - b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator*=(float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t operator*=(float8_t a, const float8_t b)
|
||||
{
|
||||
return a = a * b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator*=(float8 &a, float f)
|
||||
ccl_device_inline float8_t operator*=(float8_t a, float f)
|
||||
{
|
||||
return a = a * f;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 operator/=(float8 &a, float f)
|
||||
ccl_device_inline float8_t operator/=(float8_t a, float f)
|
||||
{
|
||||
return a = a / f;
|
||||
}
|
||||
|
||||
ccl_device_inline bool operator==(const float8 &a, const float8 &b)
|
||||
ccl_device_inline bool operator==(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return (_mm256_movemask_ps(_mm256_castsi256_ps(
|
||||
@@ -206,160 +206,195 @@ ccl_device_inline bool operator==(const float8 &a, const float8 &b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 rcp(const float8 &a)
|
||||
ccl_device_inline float8_t rcp(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_rcp_ps(a.m256));
|
||||
return float8_t(_mm256_rcp_ps(a.m256));
|
||||
#else
|
||||
return make_float8(1.0f / a.a,
|
||||
1.0f / a.b,
|
||||
1.0f / a.c,
|
||||
1.0f / a.d,
|
||||
1.0f / a.e,
|
||||
1.0f / a.f,
|
||||
1.0f / a.g,
|
||||
1.0f / a.h);
|
||||
return make_float8_t(1.0f / a.a,
|
||||
1.0f / a.b,
|
||||
1.0f / a.c,
|
||||
1.0f / a.d,
|
||||
1.0f / a.e,
|
||||
1.0f / a.f,
|
||||
1.0f / a.g,
|
||||
1.0f / a.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 sqrt(const float8 &a)
|
||||
ccl_device_inline float8_t sqrt(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_sqrt_ps(a.m256));
|
||||
return float8_t(_mm256_sqrt_ps(a.m256));
|
||||
#else
|
||||
return make_float8(sqrtf(a.a),
|
||||
sqrtf(a.b),
|
||||
sqrtf(a.c),
|
||||
sqrtf(a.d),
|
||||
sqrtf(a.e),
|
||||
sqrtf(a.f),
|
||||
sqrtf(a.g),
|
||||
sqrtf(a.h));
|
||||
return make_float8_t(sqrtf(a.a),
|
||||
sqrtf(a.b),
|
||||
sqrtf(a.c),
|
||||
sqrtf(a.d),
|
||||
sqrtf(a.e),
|
||||
sqrtf(a.f),
|
||||
sqrtf(a.g),
|
||||
sqrtf(a.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 sqr(const float8 &a)
|
||||
ccl_device_inline float8_t sqr(const float8_t a)
|
||||
{
|
||||
return a * a;
|
||||
}
|
||||
|
||||
ccl_device_inline bool is_zero(const float8 &a)
|
||||
ccl_device_inline bool is_zero(const float8_t a)
|
||||
{
|
||||
return a == make_float8(0.0f);
|
||||
return a == make_float8_t(0.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float average(const float8 &a)
|
||||
ccl_device_inline float average(const float8_t a)
|
||||
{
|
||||
return reduce_add(a) / 8.0f;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 min(const float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t min(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_min_ps(a.m256, b.m256));
|
||||
return float8_t(_mm256_min_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8(min(a.a, b.a),
|
||||
min(a.b, b.b),
|
||||
min(a.c, b.c),
|
||||
min(a.d, b.d),
|
||||
min(a.e, b.e),
|
||||
min(a.f, b.f),
|
||||
min(a.g, b.g),
|
||||
min(a.h, b.h));
|
||||
return make_float8_t(min(a.a, b.a),
|
||||
min(a.b, b.b),
|
||||
min(a.c, b.c),
|
||||
min(a.d, b.d),
|
||||
min(a.e, b.e),
|
||||
min(a.f, b.f),
|
||||
min(a.g, b.g),
|
||||
min(a.h, b.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 max(const float8 &a, const float8 &b)
|
||||
ccl_device_inline float8_t max(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_max_ps(a.m256, b.m256));
|
||||
return float8_t(_mm256_max_ps(a.m256, b.m256));
|
||||
#else
|
||||
return make_float8(max(a.a, b.a),
|
||||
max(a.b, b.b),
|
||||
max(a.c, b.c),
|
||||
max(a.d, b.d),
|
||||
max(a.e, b.e),
|
||||
max(a.f, b.f),
|
||||
max(a.g, b.g),
|
||||
max(a.h, b.h));
|
||||
return make_float8_t(max(a.a, b.a),
|
||||
max(a.b, b.b),
|
||||
max(a.c, b.c),
|
||||
max(a.d, b.d),
|
||||
max(a.e, b.e),
|
||||
max(a.f, b.f),
|
||||
max(a.g, b.g),
|
||||
max(a.h, b.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 clamp(const float8 &a, const float8 &mn, const float8 &mx)
|
||||
ccl_device_inline float8_t clamp(const float8_t a, const float8_t mn, const float8_t mx)
|
||||
{
|
||||
return min(max(a, mn), mx);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 fabs(const float8 &a)
|
||||
ccl_device_inline float8_t fabs(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
return float8(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
|
||||
return float8_t(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
|
||||
#else
|
||||
return make_float8(fabsf(a.a),
|
||||
fabsf(a.b),
|
||||
fabsf(a.c),
|
||||
fabsf(a.d),
|
||||
fabsf(a.e),
|
||||
fabsf(a.f),
|
||||
fabsf(a.g),
|
||||
fabsf(a.h));
|
||||
return make_float8_t(fabsf(a.a),
|
||||
fabsf(a.b),
|
||||
fabsf(a.c),
|
||||
fabsf(a.d),
|
||||
fabsf(a.e),
|
||||
fabsf(a.f),
|
||||
fabsf(a.g),
|
||||
fabsf(a.h));
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 mix(const float8 &a, const float8 &b, float t)
|
||||
ccl_device_inline float8_t mix(const float8_t a, const float8_t b, float t)
|
||||
{
|
||||
return a + t * (b - a);
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_min(const float8 &a)
|
||||
ccl_device_inline float8_t saturate(const float8_t a)
|
||||
{
|
||||
return clamp(a, make_float8_t(0.0f), make_float8_t(1.0f));
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t exp(float8_t v)
|
||||
{
|
||||
return make_float8_t(
|
||||
expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h));
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t log(float8_t v)
|
||||
{
|
||||
return make_float8_t(
|
||||
logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h));
|
||||
}
|
||||
|
||||
ccl_device_inline float dot(const float8_t a, const float8_t b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8_t t(_mm256_dp_ps(a.m256, b.m256, 0xFF));
|
||||
return t[0] + t[4];
|
||||
#else
|
||||
return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) +
|
||||
(a.g * b.g) + (a.h * b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8_t pow(float8_t v, float e)
|
||||
{
|
||||
return make_float8_t(powf(v.a, e),
|
||||
powf(v.b, e),
|
||||
powf(v.c, e),
|
||||
powf(v.d, e),
|
||||
powf(v.e, e),
|
||||
powf(v.f, e),
|
||||
powf(v.g, e),
|
||||
powf(v.h, e));
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_min(const float8_t a)
|
||||
{
|
||||
return min(min(min(a.a, a.b), min(a.c, a.d)), min(min(a.e, a.f), min(a.g, a.h)));
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_max(const float8 &a)
|
||||
ccl_device_inline float reduce_max(const float8_t a)
|
||||
{
|
||||
return max(max(max(a.a, a.b), max(a.c, a.d)), max(max(a.e, a.f), max(a.g, a.h)));
|
||||
}
|
||||
|
||||
ccl_device_inline float reduce_add(const float8 &a)
|
||||
ccl_device_inline float reduce_add(const float8_t a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8 b(_mm256_hadd_ps(a.m256, a.m256));
|
||||
float8 h(_mm256_hadd_ps(b.m256, b.m256));
|
||||
float8_t b(_mm256_hadd_ps(a.m256, a.m256));
|
||||
float8_t h(_mm256_hadd_ps(b.m256, b.m256));
|
||||
return h[0] + h[4];
|
||||
#else
|
||||
return a.a + a.b + a.c + a.d + a.e + a.f + a.g + a.h;
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline float8 saturate(const float8 &a)
|
||||
{
|
||||
return clamp(a, make_float8(0.0f), make_float8(1.0f));
|
||||
}
|
||||
|
||||
ccl_device_inline bool isequal(const float8 a, const float8 b)
|
||||
ccl_device_inline bool isequal(const float8_t a, const float8_t b)
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
|
||||
ccl_device_inline float8 safe_divide(const float8 a, const float b)
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float b)
|
||||
{
|
||||
return (b != 0.0f) ? a / b : make_float8(0.0f);
|
||||
return (b != 0.0f) ? a / b : make_float8_t(0.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 safe_divide(const float8 a, const float8 b)
|
||||
ccl_device_inline float8_t safe_divide(const float8_t a, const float8_t b)
|
||||
{
|
||||
return make_float8((b.a != 0.0f) ? a.a / b.a : 0.0f,
|
||||
(b.b != 0.0f) ? a.b / b.b : 0.0f,
|
||||
(b.c != 0.0f) ? a.c / b.c : 0.0f,
|
||||
(b.d != 0.0f) ? a.d / b.d : 0.0f,
|
||||
(b.e != 0.0f) ? a.e / b.e : 0.0f,
|
||||
(b.f != 0.0f) ? a.f / b.f : 0.0f,
|
||||
(b.g != 0.0f) ? a.g / b.g : 0.0f,
|
||||
(b.h != 0.0f) ? a.h / b.h : 0.0f);
|
||||
return make_float8_t((b.a != 0.0f) ? a.a / b.a : 0.0f,
|
||||
(b.b != 0.0f) ? a.b / b.b : 0.0f,
|
||||
(b.c != 0.0f) ? a.c / b.c : 0.0f,
|
||||
(b.d != 0.0f) ? a.d / b.d : 0.0f,
|
||||
(b.e != 0.0f) ? a.e / b.e : 0.0f,
|
||||
(b.f != 0.0f) ? a.f / b.f : 0.0f,
|
||||
(b.g != 0.0f) ? a.g / b.g : 0.0f,
|
||||
(b.h != 0.0f) ? a.h / b.h : 0.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 ensure_finite(float8 v)
|
||||
ccl_device_inline float8_t ensure_finite(float8_t v)
|
||||
{
|
||||
v.a = ensure_finite(v.a);
|
||||
v.b = ensure_finite(v.b);
|
||||
@@ -373,47 +408,12 @@ ccl_device_inline float8 ensure_finite(float8 v)
|
||||
return v;
|
||||
}
|
||||
|
||||
ccl_device_inline bool isfinite_safe(float8 v)
|
||||
ccl_device_inline bool isfinite_safe(float8_t v)
|
||||
{
|
||||
return isfinite_safe(v.a) && isfinite_safe(v.b) && isfinite_safe(v.c) && isfinite_safe(v.d) &&
|
||||
isfinite_safe(v.e) && isfinite_safe(v.f) && isfinite_safe(v.g) && isfinite_safe(v.h);
|
||||
}
|
||||
|
||||
ccl_device_inline float8 pow(float8 v, float e)
|
||||
{
|
||||
return make_float8(powf(v.a, e),
|
||||
powf(v.b, e),
|
||||
powf(v.c, e),
|
||||
powf(v.d, e),
|
||||
powf(v.e, e),
|
||||
powf(v.f, e),
|
||||
powf(v.g, e),
|
||||
powf(v.h, e));
|
||||
}
|
||||
|
||||
ccl_device_inline float8 exp(float8 v)
|
||||
{
|
||||
return make_float8(
|
||||
expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h));
|
||||
}
|
||||
|
||||
ccl_device_inline float8 log(float8 v)
|
||||
{
|
||||
return make_float8(
|
||||
logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h));
|
||||
}
|
||||
|
||||
ccl_device_inline float dot(const float8 &a, const float8 &b)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8 t(_mm256_dp_ps(a.m256, b.m256, 0xFF));
|
||||
return t[0] + t[4];
|
||||
#else
|
||||
return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) +
|
||||
(a.g * b.g) + (a.h * b.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
#endif /* __UTIL_MATH_FLOAT8_H__ */
|
||||
|
||||
@@ -11,10 +11,13 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* float8 is a reserved type in Metal that has not been implemented. For
|
||||
* that reason this is named float8_t. */
|
||||
|
||||
#ifdef __KERNEL_GPU__
|
||||
struct float8
|
||||
struct float8_t
|
||||
#else
|
||||
struct ccl_try_align(32) float8
|
||||
struct ccl_try_align(32) float8_t
|
||||
#endif
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
@@ -25,14 +28,14 @@ struct ccl_try_align(32) float8
|
||||
};
|
||||
};
|
||||
|
||||
__forceinline float8();
|
||||
__forceinline float8(const float8 &a);
|
||||
__forceinline explicit float8(const __m256 &a);
|
||||
__forceinline float8_t();
|
||||
__forceinline float8_t(const float8_t &a);
|
||||
__forceinline explicit float8_t(const __m256 &a);
|
||||
|
||||
__forceinline operator const __m256 &() const;
|
||||
__forceinline operator __m256 &();
|
||||
|
||||
__forceinline float8 &operator=(const float8 &a);
|
||||
__forceinline float8_t &operator=(const float8_t &a);
|
||||
|
||||
#else /* __KERNEL_AVX2__ */
|
||||
float a, b, c, d, e, f, g, h;
|
||||
@@ -44,9 +47,9 @@ struct ccl_try_align(32) float8
|
||||
#endif
|
||||
};
|
||||
|
||||
ccl_device_inline float8 make_float8(float f);
|
||||
ccl_device_inline float8
|
||||
make_float8(float a, float b, float c, float d, float e, float f, float g, float h);
|
||||
ccl_device_inline float8_t make_float8_t(float f);
|
||||
ccl_device_inline float8_t
|
||||
make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h);
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
|
||||
@@ -16,29 +16,29 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __KERNEL_AVX2__
|
||||
__forceinline float8::float8()
|
||||
__forceinline float8_t::float8_t()
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline float8::float8(const float8 &f) : m256(f.m256)
|
||||
__forceinline float8_t::float8_t(const float8_t &f) : m256(f.m256)
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline float8::float8(const __m256 &f) : m256(f)
|
||||
__forceinline float8_t::float8_t(const __m256 &f) : m256(f)
|
||||
{
|
||||
}
|
||||
|
||||
__forceinline float8::operator const __m256 &() const
|
||||
__forceinline float8_t::operator const __m256 &() const
|
||||
{
|
||||
return m256;
|
||||
}
|
||||
|
||||
__forceinline float8::operator __m256 &()
|
||||
__forceinline float8_t::operator __m256 &()
|
||||
{
|
||||
return m256;
|
||||
}
|
||||
|
||||
__forceinline float8 &float8::operator=(const float8 &f)
|
||||
__forceinline float8_t &float8_t::operator=(const float8_t &f)
|
||||
{
|
||||
m256 = f.m256;
|
||||
return *this;
|
||||
@@ -46,14 +46,14 @@ __forceinline float8 &float8::operator=(const float8 &f)
|
||||
#endif /* __KERNEL_AVX2__ */
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
__forceinline float float8::operator[](int i) const
|
||||
__forceinline float float8_t::operator[](int i) const
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
util_assert(i < 8);
|
||||
return *(&a + i);
|
||||
}
|
||||
|
||||
__forceinline float &float8::operator[](int i)
|
||||
__forceinline float &float8_t::operator[](int i)
|
||||
{
|
||||
util_assert(i >= 0);
|
||||
util_assert(i < 8);
|
||||
@@ -61,23 +61,23 @@ __forceinline float &float8::operator[](int i)
|
||||
}
|
||||
#endif
|
||||
|
||||
ccl_device_inline float8 make_float8(float f)
|
||||
ccl_device_inline float8_t make_float8_t(float f)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8 r(_mm256_set1_ps(f));
|
||||
float8_t r(_mm256_set1_ps(f));
|
||||
#else
|
||||
float8 r = {f, f, f, f, f, f, f, f};
|
||||
float8_t r = {f, f, f, f, f, f, f, f};
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
ccl_device_inline float8
|
||||
make_float8(float a, float b, float c, float d, float e, float f, float g, float h)
|
||||
ccl_device_inline float8_t
|
||||
make_float8_t(float a, float b, float c, float d, float e, float f, float g, float h)
|
||||
{
|
||||
#ifdef __KERNEL_AVX2__
|
||||
float8 r(_mm256_setr_ps(a, b, c, d, e, f, g, h));
|
||||
float8_t r(_mm256_setr_ps(a, b, c, d, e, f, g, h));
|
||||
#else
|
||||
float8 r = {a, b, c, d, e, f, g, h};
|
||||
float8_t r = {a, b, c, d, e, f, g, h};
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user