Files
test2/intern/cycles/kernel/device/cpu/bvh.h
Patrick Mours 6487395fa5 Cycles: Add linear curve shape
Add new "Linear 3D Curves" option in the Curves panel in the render
properties. This renders curves as linear segments rather than smooth
curves, for faster render time at the cost of accuracy.

On NVIDIA Blackwell GPUs, this can give a 6x speedup compared to smooth
curves, due to hardware acceleration. On NVIDIA Ada there is still
a 3x speedup, and CPU and other GPU backends will also render this
faster.

A difference with smooth curves is that these have end caps, as this
was simpler to implement and they are usually helpful anyway.

In the future this functionality will also be used to properly support
the CURVE_TYPE_POLY on the new curves object.

Pull Request: https://projects.blender.org/blender/blender/pulls/139735
2025-07-29 17:05:01 +02:00

777 lines
26 KiB
C

/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
/* CPU Embree implementation of ray-scene intersection. */
#pragma once
#include <embree4/rtcore_geometry.h>
#include <embree4/rtcore_ray.h>
#include <embree4/rtcore_scene.h>
#ifdef __KERNEL_ONEAPI__
# include "kernel/device/oneapi/compat.h"
# include "kernel/device/oneapi/globals.h"
#else
# include "kernel/device/cpu/compat.h"
# include "kernel/device/cpu/globals.h"
#endif
#include "kernel/bvh/types.h"
#include "kernel/bvh/util.h"
#include "kernel/geom/object.h"
#include "kernel/integrator/state.h"
#include "kernel/integrator/state_util.h"
#include "kernel/sample/lcg.h"
CCL_NAMESPACE_BEGIN
#ifdef __KERNEL_ONEAPI__
using numhit_t = uint16_t;
#else
using numhit_t = uint32_t;
#endif
/* Before Embree 4.4, the so-called Traversable functionality was exposed through Scene API.
* So, in order to simplify code between different versions, we are defining the traversable class
* and calls for older Embree versions as well. */
#if RTC_VERSION < 40400
# define RTCTraversable RTCScene
# define rtcGetGeometryUserDataFromTraversable rtcGetGeometryUserDataFromScene
# define rtcTraversableIntersect1 rtcIntersect1
# define rtcTraversableOccluded1 rtcOccluded1
#endif
#ifdef __KERNEL_ONEAPI__
# define CYCLES_EMBREE_USED_FEATURES \
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
#else
# define CYCLES_EMBREE_USED_FEATURES \
(RTCFeatureFlags)(RTC_FEATURE_FLAG_TRIANGLE | RTC_FEATURE_FLAG_INSTANCE | \
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS | RTC_FEATURE_FLAG_POINT | \
RTC_FEATURE_FLAG_MOTION_BLUR | RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE | \
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE)
#endif
#define EMBREE_IS_HAIR(x) (x & 1)
/* Intersection context. */
struct CCLFirstHitContext : public RTCRayQueryContext {
KernelGlobals kg;
/* For avoiding self intersections */
const Ray *ray;
};
struct CCLShadowContext : public RTCRayQueryContext {
KernelGlobals kg;
const Ray *ray;
IntegratorShadowState isect_s;
float throughput;
float max_t;
bool opaque_hit;
numhit_t max_transparent_hits;
numhit_t num_transparent_hits;
numhit_t num_recorded_hits;
};
struct CCLLocalContext : public RTCRayQueryContext {
KernelGlobals kg;
const Ray *ray;
numhit_t max_hits;
int local_object_id;
LocalIntersection *local_isect;
uint *lcg_state;
bool is_sss;
};
struct CCLVolumeContext : public RTCRayQueryContext {
KernelGlobals kg;
const Ray *ray;
#ifdef __VOLUME_RECORD_ALL__
numhit_t max_hits;
#endif
numhit_t num_hits;
Intersection *vol_isect;
};
/* Utilities. */
ccl_device_inline void kernel_embree_setup_ray(const Ray &ray,
RTCRay &rtc_ray,
const uint visibility)
{
rtc_ray.org_x = ray.P.x;
rtc_ray.org_y = ray.P.y;
rtc_ray.org_z = ray.P.z;
rtc_ray.dir_x = ray.D.x;
rtc_ray.dir_y = ray.D.y;
rtc_ray.dir_z = ray.D.z;
rtc_ray.tnear = ray.tmin;
rtc_ray.tfar = ray.tmax;
rtc_ray.time = ray.time;
rtc_ray.mask = visibility;
}
ccl_device_inline void kernel_embree_setup_rayhit(const Ray &ray,
RTCRayHit &rayhit,
const uint visibility)
{
kernel_embree_setup_ray(ray, rayhit.ray, visibility);
rayhit.hit.geomID = RTC_INVALID_GEOMETRY_ID;
rayhit.hit.instID[0] = RTC_INVALID_GEOMETRY_ID;
}
ccl_device_inline int kernel_embree_get_hit_object(const RTCHit *hit)
{
return (hit->instID[0] != RTC_INVALID_GEOMETRY_ID ? hit->instID[0] : hit->geomID) / 2;
}
ccl_device_inline bool kernel_embree_is_self_intersection(const KernelGlobals kg,
const RTCHit *hit,
const Ray *ray,
const intptr_t prim_offset)
{
const int object = kernel_embree_get_hit_object(hit);
int prim;
if ((ray->self.object == object) || (ray->self.light_object == object)) {
prim = hit->primID + prim_offset;
}
else {
return false;
}
const bool is_hair = hit->geomID & 1;
if (is_hair) {
prim = kernel_data_fetch(curve_segments, prim).prim;
}
return intersection_skip_self_shadow(ray->self, object, prim);
}
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
const RTCRay *ray,
const RTCHit *hit,
Intersection *isect,
const intptr_t prim_offset)
{
isect->t = ray->tfar;
isect->prim = hit->primID + prim_offset;
isect->object = kernel_embree_get_hit_object(hit);
const bool is_hair = hit->geomID & 1;
if (is_hair) {
const KernelCurveSegment segment = kernel_data_fetch(curve_segments, isect->prim);
isect->type = segment.type;
isect->prim = segment.prim;
isect->u = hit->u;
isect->v = hit->v;
}
else {
isect->type = kernel_data_fetch(objects, isect->object).primitive_type;
isect->u = hit->u;
isect->v = hit->v;
}
}
ccl_device_inline void kernel_embree_convert_hit(KernelGlobals kg,
const RTCRay *ray,
const RTCHit *hit,
Intersection *isect)
{
intptr_t prim_offset;
if (hit->instID[0] != RTC_INVALID_GEOMETRY_ID) {
RTCTraversable inst_scene = (RTCTraversable)rtcGetGeometryUserDataFromTraversable(
kernel_data.device_bvh, hit->instID[0]);
prim_offset = intptr_t(rtcGetGeometryUserDataFromTraversable(inst_scene, hit->geomID));
}
else {
prim_offset = intptr_t(
rtcGetGeometryUserDataFromTraversable(kernel_data.device_bvh, hit->geomID));
}
kernel_embree_convert_hit(kg, ray, hit, isect, prim_offset);
}
ccl_device_inline void kernel_embree_convert_sss_hit(KernelGlobals kg,
const RTCRay *ray,
const RTCHit *hit,
Intersection *isect,
const int object,
const intptr_t prim_offset)
{
isect->u = hit->u;
isect->v = hit->v;
isect->t = ray->tfar;
isect->prim = hit->primID + prim_offset;
isect->object = object;
isect->type = kernel_data_fetch(objects, object).primitive_type;
}
/* Ray filter functions. */
/* This gets called by Embree at every valid ray/object intersection.
* Things like recording subsurface or shadow hits for later evaluation
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls. */
ccl_device_forceinline void kernel_embree_filter_intersection_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
RTCHit *hit = (RTCHit *)args->hit;
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const ThreadKernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
if (kernel_embree_is_self_intersection(
kg, hit, cray, reinterpret_cast<intptr_t>(args->geometryUserPtr)))
{
*args->valid = 0;
return;
}
#ifdef __SHADOW_LINKING__
if (intersection_skip_shadow_link(kg, cray->self, kernel_embree_get_hit_object(hit))) {
*args->valid = 0;
return;
}
#endif
}
/* This gets called by Embree at every valid ray/object intersection.
* Things like recording subsurface or shadow hits for later evaluation
* as well as filtering for volume objects happen here.
* Cycles' own BVH does that directly inside the traversal calls.
*/
ccl_device_forceinline void kernel_embree_filter_occluded_shadow_all_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const ThreadKernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (intersection_skip_self_shadow(cray->self, current_isect.object, current_isect.prim)) {
*args->valid = 0;
return;
}
#ifdef __SHADOW_LINKING__
if (intersection_skip_shadow_link(kg, cray->self, current_isect.object)) {
*args->valid = 0;
return;
}
#endif
/* If no transparent shadows, all light is blocked. */
const int flags = intersection_get_shader_flags(kg, current_isect.prim, current_isect.type);
if ((flags & SD_HAS_TRANSPARENT_SHADOW) == 0) {
ctx->opaque_hit = true;
return;
}
if (intersection_skip_shadow_already_recoded(
ctx->isect_s, current_isect.object, current_isect.prim, ctx->num_recorded_hits))
{
*args->valid = 0;
return;
}
/* Only count transparent bounces, volume bounds bounces are counted during shading. */
ctx->num_transparent_hits += !(flags & SD_HAS_ONLY_VOLUME);
if (ctx->num_transparent_hits > ctx->max_transparent_hits) {
/* Max number of hits exceeded. */
ctx->opaque_hit = true;
return;
}
/* Always use baked shadow transparency for curves. */
if (current_isect.type & PRIMITIVE_CURVE) {
ctx->throughput *= intersection_curve_shadow_transparency(
kg, current_isect.object, current_isect.prim, current_isect.type, current_isect.u);
if (ctx->throughput < CURVE_SHADOW_TRANSPARENCY_CUTOFF) {
ctx->opaque_hit = true;
return;
}
else {
*args->valid = 0;
return;
}
}
numhit_t isect_index = ctx->num_recorded_hits;
/* Always increase the number of recorded hits, even beyond the maximum,
* so that we can detect this and trace another ray if needed.
* More details about the related logic can be found in implementation of
* "shadow_intersections_has_remaining" and "integrate_transparent_shadow"
* functions. */
++ctx->num_recorded_hits;
/* This tells Embree to continue tracing. */
*args->valid = 0;
const numhit_t max_record_hits = numhit_t(INTEGRATOR_SHADOW_ISECT_SIZE);
/* If the maximum number of hits was reached, replace the furthest intersection
* with a closer one so we get the N closest intersections. */
if (isect_index >= max_record_hits) {
/* When recording only N closest hits, max_t will always only decrease.
* So let's test if we are already not meeting criteria and can skip max_t recalculation. */
if (current_isect.t >= ctx->max_t) {
return;
}
float max_t = INTEGRATOR_STATE_ARRAY(ctx->isect_s, shadow_isect, 0, t);
numhit_t max_recorded_hit = numhit_t(0);
float second_largest_t = 0.0f;
for (numhit_t i = numhit_t(1); i < max_record_hits; ++i) {
const float isect_t = INTEGRATOR_STATE_ARRAY(ctx->isect_s, shadow_isect, i, t);
if (isect_t > max_t) {
second_largest_t = max_t;
max_recorded_hit = i;
max_t = isect_t;
}
else if (isect_t > second_largest_t) {
second_largest_t = isect_t;
}
}
if (isect_index == max_record_hits && current_isect.t >= max_t) {
/* `ctx->max_t` was initialized to `ray->tmax` before the index exceeds the limit. Now that
* we have looped through the array, we can properly clamp `ctx->max_t`. */
ctx->max_t = max_t;
return;
}
isect_index = max_recorded_hit;
/* After replacing `max_t` with `current_isect.t`, the new largest t would be either
* `current_isect.t` or the second largest t before. */
ctx->max_t = max(second_largest_t, current_isect.t);
}
integrator_state_write_shadow_isect(ctx->isect_s, &current_isect, isect_index);
}
ccl_device_forceinline void kernel_embree_filter_occluded_local_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const ThreadKernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
/* Check if it's hitting the correct object. */
Intersection current_isect;
if (ctx->is_sss) {
kernel_embree_convert_sss_hit(kg,
ray,
hit,
&current_isect,
ctx->local_object_id,
reinterpret_cast<intptr_t>(args->geometryUserPtr));
}
else {
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (ctx->local_object_id != current_isect.object) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
if (intersection_skip_self_local(cray->self, current_isect.prim)) {
*args->valid = 0;
return;
}
/* No intersection information requested, just return a hit. */
if (ctx->max_hits == 0) {
return;
}
/* Ignore curves. */
if (EMBREE_IS_HAIR(hit->geomID)) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
LocalIntersection *local_isect = ctx->local_isect;
int hit_idx = 0;
if (ctx->lcg_state) {
/* See triangle_intersect_subsurface() for the native equivalent. */
for (int i = min((int)ctx->max_hits, local_isect->num_hits) - 1; i >= 0; --i) {
if (local_isect->hits[i].t == ray->tfar) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
local_isect->num_hits++;
if (local_isect->num_hits <= ctx->max_hits) {
hit_idx = local_isect->num_hits - 1;
}
else {
/* reservoir sampling: if we are at the maximum number of
* hits, randomly replace element or skip it */
hit_idx = lcg_step_uint(ctx->lcg_state) % local_isect->num_hits;
if (hit_idx >= ctx->max_hits) {
/* This tells Embree to continue tracing. */
*args->valid = 0;
return;
}
}
}
else {
/* Record closest intersection only. */
if (local_isect->num_hits && current_isect.t > local_isect->hits[0].t) {
*args->valid = 0;
return;
}
local_isect->num_hits = 1;
}
/* record intersection */
local_isect->hits[hit_idx] = current_isect;
local_isect->Ng[hit_idx] = normalize(make_float3(hit->Ng_x, hit->Ng_y, hit->Ng_z));
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
ccl_device_forceinline void kernel_embree_filter_occluded_volume_all_func_impl(
const RTCFilterFunctionNArguments *args)
{
/* Current implementation in Cycles assumes only single-ray intersection queries. */
assert(args->N == 1);
const RTCRay *ray = (RTCRay *)args->ray;
RTCHit *hit = (RTCHit *)args->hit;
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
#ifdef __KERNEL_ONEAPI__
KernelGlobalsGPU *kg = nullptr;
#else
const ThreadKernelGlobalsCPU *kg = ctx->kg;
#endif
const Ray *cray = ctx->ray;
#ifdef __VOLUME_RECORD_ALL__
/* Append the intersection to the end of the array. */
if (ctx->num_hits < ctx->max_hits) {
#endif
Intersection current_isect;
kernel_embree_convert_hit(
kg, ray, hit, &current_isect, reinterpret_cast<intptr_t>(args->geometryUserPtr));
if (intersection_skip_self(cray->self, current_isect.object, current_isect.prim)) {
*args->valid = 0;
return;
}
Intersection *isect = &ctx->vol_isect[ctx->num_hits];
++ctx->num_hits;
*isect = current_isect;
/* Only primitives from volume object. */
uint tri_object = isect->object;
int object_flag = kernel_data_fetch(object_flag, tri_object);
if ((object_flag & SD_OBJECT_HAS_VOLUME) == 0) {
--ctx->num_hits;
#ifndef __VOLUME_RECORD_ALL__
/* Without __VOLUME_RECORD_ALL__ we need only a first counted hit, so we will
* continue tracing only if a current hit is not counted. */
*args->valid = 0;
#endif
}
#ifdef __VOLUME_RECORD_ALL__
/* This tells Embree to continue tracing. */
*args->valid = 0;
}
#endif
}
#ifdef __KERNEL_ONEAPI__
/* Static wrappers so we can call the callbacks from out side the ONEAPIKernelContext class */
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_intersection_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLFirstHitContext *ctx = (CCLFirstHitContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_intersection_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_shadow_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLShadowContext *ctx = (CCLShadowContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_shadow_all_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_local_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLLocalContext *ctx = (CCLLocalContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_local_func_impl(args);
}
RTC_SYCL_INDIRECTLY_CALLABLE static void ccl_always_inline
kernel_embree_filter_occluded_volume_all_func_static(const RTCFilterFunctionNArguments *args)
{
RTCHit *hit = (RTCHit *)args->hit;
CCLVolumeContext *ctx = (CCLVolumeContext *)(args->context);
ONEAPIKernelContext *context = static_cast<ONEAPIKernelContext *>(ctx->kg);
context->kernel_embree_filter_occluded_volume_all_func_impl(args);
}
# define kernel_embree_filter_intersection_func \
ONEAPIKernelContext::kernel_embree_filter_intersection_func_static
# define kernel_embree_filter_occluded_shadow_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_shadow_all_func_static
# define kernel_embree_filter_occluded_local_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_local_func_static
# define kernel_embree_filter_occluded_volume_all_func \
ONEAPIKernelContext::kernel_embree_filter_occluded_volume_all_func_static
#else
# define kernel_embree_filter_intersection_func kernel_embree_filter_intersection_func_impl
# define kernel_embree_filter_occluded_shadow_all_func \
kernel_embree_filter_occluded_shadow_all_func_impl
# define kernel_embree_filter_occluded_local_func kernel_embree_filter_occluded_local_func_impl
# define kernel_embree_filter_occluded_volume_all_func \
kernel_embree_filter_occluded_volume_all_func_impl
#endif
/* Scene intersection. */
ccl_device_intersect bool kernel_embree_intersect(KernelGlobals kg,
const ccl_private Ray *ray,
const uint visibility,
ccl_private Intersection *isect)
{
isect->t = ray->tmax;
CCLFirstHitContext ctx;
rtcInitRayQueryContext(&ctx);
#ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes nullptr to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
#else
ctx.kg = kg;
#endif
RTCRayHit ray_hit;
ctx.ray = ray;
kernel_embree_setup_rayhit(*ray, ray_hit, visibility);
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_intersection_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcTraversableIntersect1(kernel_data.device_bvh, &ray_hit, &args);
if (ray_hit.hit.geomID == RTC_INVALID_GEOMETRY_ID ||
ray_hit.hit.primID == RTC_INVALID_GEOMETRY_ID)
{
return false;
}
kernel_embree_convert_hit(kg, &ray_hit.ray, &ray_hit.hit, isect);
return true;
}
#ifdef __BVH_LOCAL__
ccl_device_intersect bool kernel_embree_intersect_local(KernelGlobals kg,
const ccl_private Ray *ray,
ccl_private LocalIntersection *local_isect,
const int local_object,
ccl_private uint *lcg_state,
const int max_hits)
{
const bool has_bvh = !(kernel_data_fetch(object_flag, local_object) &
SD_OBJECT_TRANSFORM_APPLIED);
CCLLocalContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes nullptr to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
ctx.is_sss = has_bvh;
ctx.lcg_state = lcg_state;
ctx.max_hits = max_hits;
ctx.ray = ray;
ctx.local_isect = local_isect;
if (local_isect) {
local_isect->num_hits = 0;
}
ctx.local_object_id = local_object;
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, PATH_RAY_ALL_VISIBILITY);
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(kernel_embree_filter_occluded_local_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
/* If this object has its own BVH, use it. */
if (has_bvh) {
float3 P = ray->P;
float3 dir = ray->D;
float3 idir = ray->D;
# ifdef __OBJECT_MOTION__
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
# else
bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
# endif
rtc_ray.org_x = P.x;
rtc_ray.org_y = P.y;
rtc_ray.org_z = P.z;
rtc_ray.dir_x = dir.x;
rtc_ray.dir_y = dir.y;
rtc_ray.dir_z = dir.z;
rtc_ray.tnear = ray->tmin;
rtc_ray.tfar = ray->tmax;
RTCTraversable scene = (RTCTraversable)rtcGetGeometryUserDataFromTraversable(
kernel_data.device_bvh, local_object * 2);
kernel_assert(scene);
if (scene) {
rtcTraversableOccluded1(scene, &rtc_ray, &args);
}
}
else {
rtcTraversableOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
}
/* rtcOccluded1 sets tfar to -inf if a hit was found. */
return (local_isect && local_isect->num_hits > 0) || (rtc_ray.tfar < 0);
}
#endif
#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool kernel_embree_intersect_shadow_all(KernelGlobals kg,
IntegratorShadowState state,
const ccl_private Ray *ray,
const uint visibility,
const uint max_transparent_hits,
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
CCLShadowContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko): Cycles GPU back-ends passes nullptr to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
ctx.num_transparent_hits = ctx.num_recorded_hits = numhit_t(0);
ctx.throughput = 1.0f;
ctx.opaque_hit = false;
ctx.isect_s = state;
ctx.max_transparent_hits = numhit_t(max_transparent_hits);
ctx.max_t = ray->tmax;
ctx.ray = ray;
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_shadow_all_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcTraversableOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
*num_recorded_hits = ctx.num_recorded_hits;
*throughput = ctx.throughput;
return ctx.opaque_hit;
}
#endif
#ifdef __VOLUME__
ccl_device_intersect uint kernel_embree_intersect_volume(KernelGlobals kg,
const ccl_private Ray *ray,
ccl_private Intersection *isect,
# ifdef __VOLUME_RECORD_ALL__
const uint max_hits,
# endif
const uint visibility)
{
CCLVolumeContext ctx;
rtcInitRayQueryContext(&ctx);
# ifdef __KERNEL_ONEAPI__
/* NOTE(sirgienko) Cycles GPU back-ends passes nullptr to KernelGlobals and
* uses global device allocation (CUDA, Optix, HIP) or passes all needed data
* as a class context (Metal, oneAPI). So we need to pass this context here
* in order to have an access to it later in Embree filter functions on GPU. */
ctx.kg = (KernelGlobals)this;
# else
ctx.kg = kg;
# endif
ctx.vol_isect = isect;
# ifdef __VOLUME_RECORD_ALL__
ctx.max_hits = numhit_t(max_hits);
# endif
ctx.num_hits = numhit_t(0);
ctx.ray = ray;
RTCRay rtc_ray;
kernel_embree_setup_ray(*ray, rtc_ray, visibility);
RTCOccludedArguments args;
rtcInitOccludedArguments(&args);
args.filter = reinterpret_cast<RTCFilterFunctionN>(
kernel_embree_filter_occluded_volume_all_func);
args.feature_mask = CYCLES_EMBREE_USED_FEATURES;
args.context = &ctx;
rtcTraversableOccluded1(kernel_data.device_bvh, &rtc_ray, &args);
return ctx.num_hits;
}
#endif
CCL_NAMESPACE_END