Files
test/intern/cycles/kernel/geom/object.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

620 lines
19 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
/* Object Primitive
*
* All mesh and curve primitives are part of an object. The same mesh and curves
* may be instanced multiple times by different objects.
*
* If the mesh is not instanced multiple times, the object will not be explicitly
* stored as a primitive in the BVH, rather the bare triangles are curved are
* directly primitives in the BVH with world space locations applied, and the object
* ID is looked up afterwards. */
#pragma once
CCL_NAMESPACE_BEGIN
/* Object attributes, for now a fixed size and contents */
enum ObjectTransform {
OBJECT_TRANSFORM = 0,
OBJECT_INVERSE_TRANSFORM = 1,
};
enum ObjectVectorTransform { OBJECT_PASS_MOTION_PRE = 0, OBJECT_PASS_MOTION_POST = 1 };
/* Object to world space transformation */
ccl_device_inline Transform object_fetch_transform(KernelGlobals kg,
int object,
enum ObjectTransform type)
{
if (type == OBJECT_INVERSE_TRANSFORM) {
return kernel_data_fetch(objects, object).itfm;
}
else {
return kernel_data_fetch(objects, object).tfm;
}
}
/* Lamp to world space transformation */
ccl_device_inline Transform lamp_fetch_transform(KernelGlobals kg, int lamp, bool inverse)
{
if (inverse) {
return kernel_data_fetch(lights, lamp).itfm;
}
else {
return kernel_data_fetch(lights, lamp).tfm;
}
}
/* Object to world space transformation for motion vectors */
ccl_device_inline Transform object_fetch_motion_pass_transform(KernelGlobals kg,
int object,
enum ObjectVectorTransform type)
{
int offset = object * OBJECT_MOTION_PASS_SIZE + (int)type;
return kernel_data_fetch(object_motion_pass, offset);
}
/* Motion blurred object transformations */
#ifdef __OBJECT_MOTION__
ccl_device_inline Transform object_fetch_transform_motion(KernelGlobals kg, int object, float time)
{
const uint motion_offset = kernel_data_fetch(objects, object).motion_offset;
ccl_global const DecomposedTransform *motion = &kernel_data_fetch(object_motion, motion_offset);
const uint num_steps = kernel_data_fetch(objects, object).numsteps * 2 + 1;
Transform tfm;
transform_motion_array_interpolate(&tfm, motion, num_steps, time);
return tfm;
}
ccl_device_inline Transform object_fetch_transform_motion_test(KernelGlobals kg,
int object,
float time,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private Transform *itfm)
{
int object_flag = kernel_data_fetch(object_flag, object);
if (object_flag & SD_OBJECT_MOTION) {
/* if we do motion blur */
Transform tfm = object_fetch_transform_motion(kg, object, time);
if (itfm)
*itfm = transform_quick_inverse(tfm);
return tfm;
}
else {
Transform tfm = object_fetch_transform(kg, object, OBJECT_TRANSFORM);
if (itfm)
*itfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
return tfm;
}
}
#endif
/* Get transform matrix for shading point. */
ccl_device_inline Transform object_get_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd)
{
#ifdef __OBJECT_MOTION__
return (sd->object_flag & SD_OBJECT_MOTION) ?
sd->ob_tfm_motion :
object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
#else
return object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
#endif
}
ccl_device_inline Transform object_get_inverse_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd)
{
#ifdef __OBJECT_MOTION__
return (sd->object_flag & SD_OBJECT_MOTION) ?
sd->ob_itfm_motion :
object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
#else
return object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
#endif
}
/* Transform position from object to world space */
ccl_device_inline void object_position_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd,
ccl_private float3 *P)
{
#ifdef __OBJECT_MOTION__
if (sd->object_flag & SD_OBJECT_MOTION) {
*P = transform_point_auto(&sd->ob_tfm_motion, *P);
return;
}
#endif
2017-02-16 06:24:13 -05:00
Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
*P = transform_point(&tfm, *P);
}
/* Transform position from world to object space */
ccl_device_inline void object_inverse_position_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd,
ccl_private float3 *P)
{
#ifdef __OBJECT_MOTION__
if (sd->object_flag & SD_OBJECT_MOTION) {
*P = transform_point_auto(&sd->ob_itfm_motion, *P);
return;
}
#endif
2017-02-16 06:24:13 -05:00
Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
*P = transform_point(&tfm, *P);
}
/* Transform normal from world to object space */
ccl_device_inline void object_inverse_normal_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd,
ccl_private float3 *N)
{
#ifdef __OBJECT_MOTION__
if (sd->object_flag & SD_OBJECT_MOTION) {
if ((sd->object != OBJECT_NONE) || (sd->type == PRIMITIVE_LAMP)) {
*N = normalize(transform_direction_transposed_auto(&sd->ob_tfm_motion, *N));
}
return;
}
#endif
2017-02-16 06:24:13 -05:00
if (sd->object != OBJECT_NONE) {
Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
*N = normalize(transform_direction_transposed(&tfm, *N));
}
else if (sd->type == PRIMITIVE_LAMP) {
Transform tfm = lamp_fetch_transform(kg, sd->lamp, false);
*N = normalize(transform_direction_transposed(&tfm, *N));
}
}
/* Transform normal from object to world space */
ccl_device_inline void object_normal_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd,
ccl_private float3 *N)
{
#ifdef __OBJECT_MOTION__
if (sd->object_flag & SD_OBJECT_MOTION) {
*N = normalize(transform_direction_transposed_auto(&sd->ob_itfm_motion, *N));
return;
}
#endif
2017-02-16 06:24:13 -05:00
Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
*N = normalize(transform_direction_transposed(&tfm, *N));
}
/* Transform direction vector from object to world space */
ccl_device_inline void object_dir_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd,
ccl_private float3 *D)
{
#ifdef __OBJECT_MOTION__
if (sd->object_flag & SD_OBJECT_MOTION) {
*D = transform_direction_auto(&sd->ob_tfm_motion, *D);
return;
}
#endif
2017-02-16 06:24:13 -05:00
Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
*D = transform_direction(&tfm, *D);
}
/* Transform direction vector from world to object space */
ccl_device_inline void object_inverse_dir_transform(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const ShaderData *sd,
ccl_private float3 *D)
{
#ifdef __OBJECT_MOTION__
if (sd->object_flag & SD_OBJECT_MOTION) {
*D = transform_direction_auto(&sd->ob_itfm_motion, *D);
return;
}
#endif
const Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_INVERSE_TRANSFORM);
*D = transform_direction(&tfm, *D);
}
/* Object center position */
ccl_device_inline float3 object_location(KernelGlobals kg, ccl_private const ShaderData *sd)
{
2017-02-16 06:24:13 -05:00
if (sd->object == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
#ifdef __OBJECT_MOTION__
if (sd->object_flag & SD_OBJECT_MOTION) {
return make_float3(sd->ob_tfm_motion.x.w, sd->ob_tfm_motion.y.w, sd->ob_tfm_motion.z.w);
}
#endif
2017-02-16 06:24:13 -05:00
Transform tfm = object_fetch_transform(kg, sd->object, OBJECT_TRANSFORM);
return make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
}
/* Color of the object */
ccl_device_inline float3 object_color(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
ccl_global const KernelObject *kobject = &kernel_data_fetch(objects, object);
return make_float3(kobject->color[0], kobject->color[1], kobject->color[2]);
}
/* Alpha of the object */
ccl_device_inline float object_alpha(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return 0.0f;
return kernel_data_fetch(objects, object).alpha;
}
/* Pass ID number of object */
ccl_device_inline float object_pass_id(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return 0.0f;
return kernel_data_fetch(objects, object).pass_id;
}
/* Lightgroup of lamp */
ccl_device_inline int lamp_lightgroup(KernelGlobals kg, int lamp)
{
if (lamp == LAMP_NONE)
return LIGHTGROUP_NONE;
return kernel_data_fetch(lights, lamp).lightgroup;
}
/* Lightgroup of object */
ccl_device_inline int object_lightgroup(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return LIGHTGROUP_NONE;
return kernel_data_fetch(objects, object).lightgroup;
}
/* Per lamp random number for shader variation */
ccl_device_inline float lamp_random_number(KernelGlobals kg, int lamp)
{
if (lamp == LAMP_NONE)
return 0.0f;
return kernel_data_fetch(lights, lamp).random;
}
/* Per object random number for shader variation */
ccl_device_inline float object_random_number(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return 0.0f;
return kernel_data_fetch(objects, object).random_number;
}
/* Particle ID from which this object was generated */
ccl_device_inline int object_particle_id(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return 0;
return kernel_data_fetch(objects, object).particle_index;
}
/* Generated texture coordinate on surface from where object was instanced */
ccl_device_inline float3 object_dupli_generated(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
ccl_global const KernelObject *kobject = &kernel_data_fetch(objects, object);
return make_float3(
kobject->dupli_generated[0], kobject->dupli_generated[1], kobject->dupli_generated[2]);
}
/* UV texture coordinate on surface from where object was instanced */
ccl_device_inline float3 object_dupli_uv(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return make_float3(0.0f, 0.0f, 0.0f);
ccl_global const KernelObject *kobject = &kernel_data_fetch(objects, object);
return make_float3(kobject->dupli_uv[0], kobject->dupli_uv[1], 0.0f);
}
/* Information about mesh for motion blurred triangles and curves */
ccl_device_inline void object_motion_info(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
int object,
ccl_private int *numsteps,
ccl_private int *numverts,
ccl_private int *numkeys)
{
if (numkeys) {
*numkeys = kernel_data_fetch(objects, object).numkeys;
}
if (numsteps)
*numsteps = kernel_data_fetch(objects, object).numsteps;
if (numverts)
*numverts = kernel_data_fetch(objects, object).numverts;
}
/* Offset to an objects patch map */
ccl_device_inline uint object_patch_map_offset(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return 0;
return kernel_data_fetch(objects, object).patch_map_offset;
}
/* Volume step size */
ccl_device_inline float object_volume_density(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE) {
return 1.0f;
}
return kernel_data_fetch(objects, object).volume_density;
}
ccl_device_inline float object_volume_step_size(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE) {
return kernel_data.background.volume_step_size;
}
return kernel_data_fetch(object_volume_step, object);
}
/* Pass ID for shader */
ccl_device int shader_pass_id(KernelGlobals kg, ccl_private const ShaderData *sd)
{
return kernel_data_fetch(shaders, (sd->shader & SHADER_MASK)).pass_id;
}
/* Cryptomatte ID */
ccl_device_inline float object_cryptomatte_id(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return 0.0f;
return kernel_data_fetch(objects, object).cryptomatte_object;
}
ccl_device_inline float object_cryptomatte_asset_id(KernelGlobals kg, int object)
{
if (object == OBJECT_NONE)
return 0;
return kernel_data_fetch(objects, object).cryptomatte_asset;
}
/* Particle data from which object was instanced */
ccl_device_inline uint particle_index(KernelGlobals kg, int particle)
{
return kernel_data_fetch(particles, particle).index;
}
ccl_device float particle_age(KernelGlobals kg, int particle)
{
return kernel_data_fetch(particles, particle).age;
}
ccl_device float particle_lifetime(KernelGlobals kg, int particle)
{
return kernel_data_fetch(particles, particle).lifetime;
}
ccl_device float particle_size(KernelGlobals kg, int particle)
{
return kernel_data_fetch(particles, particle).size;
}
ccl_device float4 particle_rotation(KernelGlobals kg, int particle)
{
return kernel_data_fetch(particles, particle).rotation;
}
ccl_device float3 particle_location(KernelGlobals kg, int particle)
{
return float4_to_float3(kernel_data_fetch(particles, particle).location);
}
ccl_device float3 particle_velocity(KernelGlobals kg, int particle)
{
return float4_to_float3(kernel_data_fetch(particles, particle).velocity);
}
ccl_device float3 particle_angular_velocity(KernelGlobals kg, int particle)
{
return float4_to_float3(kernel_data_fetch(particles, particle).angular_velocity);
}
/* Object intersection in BVH */
ccl_device_inline float3 bvh_clamp_direction(float3 dir)
{
const float ooeps = 8.271806E-25f;
return make_float3((fabsf(dir.x) > ooeps) ? dir.x : copysignf(ooeps, dir.x),
2014-05-05 02:19:08 +10:00
(fabsf(dir.y) > ooeps) ? dir.y : copysignf(ooeps, dir.y),
(fabsf(dir.z) > ooeps) ? dir.z : copysignf(ooeps, dir.z));
}
ccl_device_inline float3 bvh_inverse_direction(float3 dir)
{
return rcp(dir);
}
/* Transform ray into object space to enter static object in BVH */
ccl_device_inline float bvh_instance_push(KernelGlobals kg,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
int object,
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
ccl_private float3 *idir)
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
*P = transform_point(&tfm, ray->P);
float len;
*dir = bvh_clamp_direction(normalize_len(transform_direction(&tfm, ray->D), &len));
*idir = bvh_inverse_direction(*dir);
return len;
}
2021-02-05 16:23:34 +11:00
/* Transform ray to exit static object in BVH. */
ccl_device_inline float bvh_instance_pop(KernelGlobals kg,
int object,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
ccl_private float3 *idir,
float t)
{
if (t != FLT_MAX) {
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
t /= len(transform_direction(&tfm, ray->D));
}
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
return t;
}
Cycles: shadow function optimization for transparent shadows (CPU only). Old algorithm: Raytrace from one transparent surface to the next step by step. To minimize overhead in cases where we don't need transparent shadows, we first trace a regular shadow ray. We check if the hit primitive was potentially transparent, and only in that case start marching. this gives extra ray cast for the cases were we do want transparency. New algorithm: We trace a single ray. If it hits any opaque surface, or more than a given number of transparent surfaces is hit, then we consider the geometry to be entirely blocked. If not, all transparent surfaces will be recorded and we will shade them one by one to determine how much light is blocked. This all happens in one scene intersection function. Recording all hits works well in some cases but may be slower in others. If we have many semi-transparent hairs, one intersection may be faster because you'd be reinteresecting the same hairs a lot with each step otherwise. If however there is mostly binary transparency then we may be recording many unnecessary intersections when one of the first surfaces blocks all light. We found that this helps quite nicely in some scenes, on koro.blend this can give a 50% reduction in render time, on the pabellon barcelona scene and a forest scene with transparent leaves it was 30%. Some other files rendered maybe 1% or 2% slower, but this seems a reasonable tradeoff. Differential Revision: https://developer.blender.org/D473
2014-04-19 17:02:30 +02:00
/* Same as above, but returns scale factor to apply to multiple intersection distances */
ccl_device_inline void bvh_instance_pop_factor(KernelGlobals kg,
Cycles: shadow function optimization for transparent shadows (CPU only). Old algorithm: Raytrace from one transparent surface to the next step by step. To minimize overhead in cases where we don't need transparent shadows, we first trace a regular shadow ray. We check if the hit primitive was potentially transparent, and only in that case start marching. this gives extra ray cast for the cases were we do want transparency. New algorithm: We trace a single ray. If it hits any opaque surface, or more than a given number of transparent surfaces is hit, then we consider the geometry to be entirely blocked. If not, all transparent surfaces will be recorded and we will shade them one by one to determine how much light is blocked. This all happens in one scene intersection function. Recording all hits works well in some cases but may be slower in others. If we have many semi-transparent hairs, one intersection may be faster because you'd be reinteresecting the same hairs a lot with each step otherwise. If however there is mostly binary transparency then we may be recording many unnecessary intersections when one of the first surfaces blocks all light. We found that this helps quite nicely in some scenes, on koro.blend this can give a 50% reduction in render time, on the pabellon barcelona scene and a forest scene with transparent leaves it was 30%. Some other files rendered maybe 1% or 2% slower, but this seems a reasonable tradeoff. Differential Revision: https://developer.blender.org/D473
2014-04-19 17:02:30 +02:00
int object,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
ccl_private float3 *idir,
ccl_private float *t_fac)
Cycles: shadow function optimization for transparent shadows (CPU only). Old algorithm: Raytrace from one transparent surface to the next step by step. To minimize overhead in cases where we don't need transparent shadows, we first trace a regular shadow ray. We check if the hit primitive was potentially transparent, and only in that case start marching. this gives extra ray cast for the cases were we do want transparency. New algorithm: We trace a single ray. If it hits any opaque surface, or more than a given number of transparent surfaces is hit, then we consider the geometry to be entirely blocked. If not, all transparent surfaces will be recorded and we will shade them one by one to determine how much light is blocked. This all happens in one scene intersection function. Recording all hits works well in some cases but may be slower in others. If we have many semi-transparent hairs, one intersection may be faster because you'd be reinteresecting the same hairs a lot with each step otherwise. If however there is mostly binary transparency then we may be recording many unnecessary intersections when one of the first surfaces blocks all light. We found that this helps quite nicely in some scenes, on koro.blend this can give a 50% reduction in render time, on the pabellon barcelona scene and a forest scene with transparent leaves it was 30%. Some other files rendered maybe 1% or 2% slower, but this seems a reasonable tradeoff. Differential Revision: https://developer.blender.org/D473
2014-04-19 17:02:30 +02:00
{
Transform tfm = object_fetch_transform(kg, object, OBJECT_INVERSE_TRANSFORM);
*t_fac = 1.0f / len(transform_direction(&tfm, ray->D));
Cycles: shadow function optimization for transparent shadows (CPU only). Old algorithm: Raytrace from one transparent surface to the next step by step. To minimize overhead in cases where we don't need transparent shadows, we first trace a regular shadow ray. We check if the hit primitive was potentially transparent, and only in that case start marching. this gives extra ray cast for the cases were we do want transparency. New algorithm: We trace a single ray. If it hits any opaque surface, or more than a given number of transparent surfaces is hit, then we consider the geometry to be entirely blocked. If not, all transparent surfaces will be recorded and we will shade them one by one to determine how much light is blocked. This all happens in one scene intersection function. Recording all hits works well in some cases but may be slower in others. If we have many semi-transparent hairs, one intersection may be faster because you'd be reinteresecting the same hairs a lot with each step otherwise. If however there is mostly binary transparency then we may be recording many unnecessary intersections when one of the first surfaces blocks all light. We found that this helps quite nicely in some scenes, on koro.blend this can give a 50% reduction in render time, on the pabellon barcelona scene and a forest scene with transparent leaves it was 30%. Some other files rendered maybe 1% or 2% slower, but this seems a reasonable tradeoff. Differential Revision: https://developer.blender.org/D473
2014-04-19 17:02:30 +02:00
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
}
#ifdef __OBJECT_MOTION__
/* Transform ray into object space to enter motion blurred object in BVH */
ccl_device_inline float bvh_instance_motion_push(KernelGlobals kg,
int object,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
ccl_private float3 *idir,
ccl_private Transform *itfm)
{
object_fetch_transform_motion_test(kg, object, ray->time, itfm);
*P = transform_point(itfm, ray->P);
float len;
*dir = bvh_clamp_direction(normalize_len(transform_direction(itfm, ray->D), &len));
*idir = bvh_inverse_direction(*dir);
return len;
}
2021-02-05 16:23:34 +11:00
/* Transform ray to exit motion blurred object in BVH. */
ccl_device_inline float bvh_instance_motion_pop(KernelGlobals kg,
int object,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
ccl_private float3 *idir,
float t,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private Transform *itfm)
{
if (t != FLT_MAX) {
t /= len(transform_direction(itfm, ray->D));
}
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
return t;
}
Cycles: shadow function optimization for transparent shadows (CPU only). Old algorithm: Raytrace from one transparent surface to the next step by step. To minimize overhead in cases where we don't need transparent shadows, we first trace a regular shadow ray. We check if the hit primitive was potentially transparent, and only in that case start marching. this gives extra ray cast for the cases were we do want transparency. New algorithm: We trace a single ray. If it hits any opaque surface, or more than a given number of transparent surfaces is hit, then we consider the geometry to be entirely blocked. If not, all transparent surfaces will be recorded and we will shade them one by one to determine how much light is blocked. This all happens in one scene intersection function. Recording all hits works well in some cases but may be slower in others. If we have many semi-transparent hairs, one intersection may be faster because you'd be reinteresecting the same hairs a lot with each step otherwise. If however there is mostly binary transparency then we may be recording many unnecessary intersections when one of the first surfaces blocks all light. We found that this helps quite nicely in some scenes, on koro.blend this can give a 50% reduction in render time, on the pabellon barcelona scene and a forest scene with transparent leaves it was 30%. Some other files rendered maybe 1% or 2% slower, but this seems a reasonable tradeoff. Differential Revision: https://developer.blender.org/D473
2014-04-19 17:02:30 +02:00
/* Same as above, but returns scale factor to apply to multiple intersection distances */
ccl_device_inline void bvh_instance_motion_pop_factor(KernelGlobals kg,
int object,
Cycles: Kernel address space changes for MSL This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation. MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness. The vast majority of deltas in this patch fall into one of two cases: - Ensuring ccl_private is specified for thread-local pointer types - Ensuring ccl_global is specified for device-wide pointer types Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant. In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture. The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation. Ref T92212 Reviewed By: brecht Maniphest Tasks: T92212 Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
ccl_private const Ray *ray,
ccl_private float3 *P,
ccl_private float3 *dir,
ccl_private float3 *idir,
ccl_private float *t_fac,
ccl_private Transform *itfm)
Cycles: shadow function optimization for transparent shadows (CPU only). Old algorithm: Raytrace from one transparent surface to the next step by step. To minimize overhead in cases where we don't need transparent shadows, we first trace a regular shadow ray. We check if the hit primitive was potentially transparent, and only in that case start marching. this gives extra ray cast for the cases were we do want transparency. New algorithm: We trace a single ray. If it hits any opaque surface, or more than a given number of transparent surfaces is hit, then we consider the geometry to be entirely blocked. If not, all transparent surfaces will be recorded and we will shade them one by one to determine how much light is blocked. This all happens in one scene intersection function. Recording all hits works well in some cases but may be slower in others. If we have many semi-transparent hairs, one intersection may be faster because you'd be reinteresecting the same hairs a lot with each step otherwise. If however there is mostly binary transparency then we may be recording many unnecessary intersections when one of the first surfaces blocks all light. We found that this helps quite nicely in some scenes, on koro.blend this can give a 50% reduction in render time, on the pabellon barcelona scene and a forest scene with transparent leaves it was 30%. Some other files rendered maybe 1% or 2% slower, but this seems a reasonable tradeoff. Differential Revision: https://developer.blender.org/D473
2014-04-19 17:02:30 +02:00
{
*t_fac = 1.0f / len(transform_direction(itfm, ray->D));
Cycles: shadow function optimization for transparent shadows (CPU only). Old algorithm: Raytrace from one transparent surface to the next step by step. To minimize overhead in cases where we don't need transparent shadows, we first trace a regular shadow ray. We check if the hit primitive was potentially transparent, and only in that case start marching. this gives extra ray cast for the cases were we do want transparency. New algorithm: We trace a single ray. If it hits any opaque surface, or more than a given number of transparent surfaces is hit, then we consider the geometry to be entirely blocked. If not, all transparent surfaces will be recorded and we will shade them one by one to determine how much light is blocked. This all happens in one scene intersection function. Recording all hits works well in some cases but may be slower in others. If we have many semi-transparent hairs, one intersection may be faster because you'd be reinteresecting the same hairs a lot with each step otherwise. If however there is mostly binary transparency then we may be recording many unnecessary intersections when one of the first surfaces blocks all light. We found that this helps quite nicely in some scenes, on koro.blend this can give a 50% reduction in render time, on the pabellon barcelona scene and a forest scene with transparent leaves it was 30%. Some other files rendered maybe 1% or 2% slower, but this seems a reasonable tradeoff. Differential Revision: https://developer.blender.org/D473
2014-04-19 17:02:30 +02:00
*P = ray->P;
*dir = bvh_clamp_direction(ray->D);
*idir = bvh_inverse_direction(*dir);
}
#endif
/* TODO: This can be removed when we know if no devices will require explicit
* address space qualifiers for this case. */
2015-05-09 19:34:30 +05:00
#define object_position_transform_auto object_position_transform
#define object_dir_transform_auto object_dir_transform
#define object_normal_transform_auto object_normal_transform
2015-05-09 19:34:30 +05:00
CCL_NAMESPACE_END