Cycles: introduce dual types
to replace some uses of dfdx/dfdy/differentials. No functional change expected. Pull Request: https://projects.blender.org/blender/blender/pulls/143178
This commit is contained in:
@@ -400,6 +400,7 @@ set(SRC_UTIL_HEADERS
|
||||
../util/math_int3.h
|
||||
../util/math_int4.h
|
||||
../util/math_int8.h
|
||||
../util/math_dual.h
|
||||
../util/projection.h
|
||||
../util/projection_inverse.h
|
||||
../util/rect.h
|
||||
@@ -424,6 +425,7 @@ set(SRC_UTIL_HEADERS
|
||||
../util/types_uint3.h
|
||||
../util/types_uint4.h
|
||||
../util/types_ushort4.h
|
||||
../util/types_dual.h
|
||||
)
|
||||
|
||||
set(LIB
|
||||
|
||||
@@ -46,12 +46,13 @@ ccl_device_inline T curve_attribute_dfdy(const ccl_private differential &du,
|
||||
/* Read attributes on various curve elements, and compute the partial derivatives if requested. */
|
||||
|
||||
template<typename T>
|
||||
ccl_device T curve_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
ccl_private T *dfdx,
|
||||
ccl_private T *dfdy)
|
||||
ccl_device dual<T> curve_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
const bool dx = false,
|
||||
const bool dy = false)
|
||||
{
|
||||
dual<T> result;
|
||||
if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) {
|
||||
const KernelCurve curve = kernel_data_fetch(curves, sd->prim);
|
||||
const int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type);
|
||||
@@ -61,34 +62,27 @@ ccl_device T curve_attribute(KernelGlobals kg,
|
||||
const T f1 = attribute_data_fetch<T>(kg, desc.offset + k1);
|
||||
|
||||
# ifdef __RAY_DIFFERENTIALS__
|
||||
if (dfdx) {
|
||||
*dfdx = curve_attribute_dfdx(sd->du, f0, f1);
|
||||
if (dx) {
|
||||
result.dx = curve_attribute_dfdx(sd->du, f0, f1);
|
||||
}
|
||||
if (dfdy) {
|
||||
*dfdy = curve_attribute_dfdy(sd->du, f0, f1);
|
||||
if (dy) {
|
||||
result.dy = curve_attribute_dfdy(sd->du, f0, f1);
|
||||
}
|
||||
# endif
|
||||
|
||||
return mix(f0, f1, sd->u);
|
||||
result.val = mix(f0, f1, sd->u);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* idea: we can't derive any useful differentials here, but for tiled
|
||||
* mipmap image caching it would be useful to avoid reading the highest
|
||||
* detail level always. maybe a derivative based on the hair density
|
||||
* could be computed somehow? */
|
||||
# ifdef __RAY_DIFFERENTIALS__
|
||||
if (dfdx) {
|
||||
*dfdx = make_zero<T>();
|
||||
}
|
||||
if (dfdy) {
|
||||
*dfdy = make_zero<T>();
|
||||
}
|
||||
# endif
|
||||
|
||||
if (desc.element == ATTR_ELEMENT_CURVE) {
|
||||
return attribute_data_fetch<T>(kg, desc.offset + sd->prim);
|
||||
return dual<T>(attribute_data_fetch<T>(kg, desc.offset + sd->prim));
|
||||
}
|
||||
return make_zero<T>();
|
||||
return make_zero<dual<T>>();
|
||||
}
|
||||
|
||||
/* Curve thickness */
|
||||
@@ -127,9 +121,7 @@ ccl_device float curve_random(KernelGlobals kg, const ccl_private ShaderData *sd
|
||||
{
|
||||
if (sd->type & PRIMITIVE_CURVE) {
|
||||
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_CURVE_RANDOM);
|
||||
return (desc.offset != ATTR_STD_NOT_FOUND) ?
|
||||
curve_attribute<float>(kg, sd, desc, nullptr, nullptr) :
|
||||
0.0f;
|
||||
return (desc.offset != ATTR_STD_NOT_FOUND) ? curve_attribute<float>(kg, sd, desc).val : 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
@@ -130,9 +130,10 @@ ccl_device_inline Transform lamp_get_inverse_transform(KernelGlobals kg,
|
||||
|
||||
/* Transform position from object to world space */
|
||||
|
||||
template<class T>
|
||||
ccl_device_inline void object_position_transform(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
ccl_private float3 *P)
|
||||
ccl_private T *P)
|
||||
{
|
||||
#ifdef __OBJECT_MOTION__
|
||||
if (sd->object_flag & SD_OBJECT_MOTION) {
|
||||
|
||||
@@ -22,25 +22,16 @@ CCL_NAMESPACE_BEGIN
|
||||
/* Reading attributes on various point elements */
|
||||
|
||||
template<typename T>
|
||||
ccl_device T point_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
ccl_private T *dx,
|
||||
ccl_private T *dy)
|
||||
ccl_device dual<T> point_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
const bool /* dx */ = false,
|
||||
const bool /* dy */ = false)
|
||||
{
|
||||
# ifdef __RAY_DIFFERENTIALS__
|
||||
if (dx) {
|
||||
*dx = make_zero<T>();
|
||||
}
|
||||
if (dy) {
|
||||
*dy = make_zero<T>();
|
||||
}
|
||||
# endif
|
||||
|
||||
if (desc.element == ATTR_ELEMENT_VERTEX) {
|
||||
return attribute_data_fetch<T>(kg, desc.offset + sd->prim);
|
||||
return dual<T>(attribute_data_fetch<T>(kg, desc.offset + sd->prim));
|
||||
}
|
||||
return make_zero<T>();
|
||||
return make_zero<dual<T>>();
|
||||
}
|
||||
|
||||
/* Point position */
|
||||
@@ -90,9 +81,7 @@ ccl_device float point_random(KernelGlobals kg, const ccl_private ShaderData *sd
|
||||
{
|
||||
if (sd->type & PRIMITIVE_POINT) {
|
||||
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POINT_RANDOM);
|
||||
return (desc.offset != ATTR_STD_NOT_FOUND) ?
|
||||
point_attribute<float>(kg, sd, desc, nullptr, nullptr) :
|
||||
0.0f;
|
||||
return (desc.offset != ATTR_STD_NOT_FOUND) ? point_attribute<float>(kg, sd, desc).val : 0.0f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
@@ -29,44 +29,31 @@ CCL_NAMESPACE_BEGIN
|
||||
* heavy volume interpolation code. */
|
||||
|
||||
template<typename T>
|
||||
ccl_device_forceinline T primitive_surface_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
ccl_private T *dfdx,
|
||||
ccl_private T *dfdy)
|
||||
ccl_device_forceinline dual<T> primitive_surface_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
const bool dx = false,
|
||||
const bool dy = false)
|
||||
{
|
||||
if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) {
|
||||
if (dfdx) {
|
||||
*dfdx = make_zero<T>();
|
||||
}
|
||||
if (dfdy) {
|
||||
*dfdy = make_zero<T>();
|
||||
}
|
||||
|
||||
return attribute_data_fetch<T>(kg, desc.offset);
|
||||
return dual<T>(attribute_data_fetch<T>(kg, desc.offset));
|
||||
}
|
||||
|
||||
if (sd->type & PRIMITIVE_TRIANGLE) {
|
||||
return triangle_attribute<T>(kg, sd, desc, dfdx, dfdy);
|
||||
return triangle_attribute<T>(kg, sd, desc, dx, dy);
|
||||
}
|
||||
#ifdef __HAIR__
|
||||
if (sd->type & PRIMITIVE_CURVE) {
|
||||
return curve_attribute<T>(kg, sd, desc, dfdx, dfdy);
|
||||
return curve_attribute<T>(kg, sd, desc, dx, dy);
|
||||
}
|
||||
#endif
|
||||
#ifdef __POINTCLOUD__
|
||||
else if (sd->type & PRIMITIVE_POINT) {
|
||||
return point_attribute<T>(kg, sd, desc, dfdx, dfdy);
|
||||
return point_attribute<T>(kg, sd, desc, dx, dy);
|
||||
}
|
||||
#endif
|
||||
else {
|
||||
if (dfdx) {
|
||||
*dfdx = make_zero<T>();
|
||||
}
|
||||
if (dfdy) {
|
||||
*dfdy = make_zero<T>();
|
||||
}
|
||||
return make_zero<T>();
|
||||
return make_zero<dual<T>>();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,7 +92,7 @@ ccl_device_forceinline float3 primitive_uv(KernelGlobals kg, const ccl_private S
|
||||
return make_float3(0.0f, 0.0f, 0.0f);
|
||||
}
|
||||
|
||||
const float2 uv = primitive_surface_attribute<float2>(kg, sd, desc, nullptr, nullptr);
|
||||
const float2 uv = primitive_surface_attribute<float2>(kg, sd, desc).val;
|
||||
return make_float3(uv.x, uv.y, 1.0f);
|
||||
}
|
||||
|
||||
@@ -124,9 +111,8 @@ ccl_device bool primitive_ptex(KernelGlobals kg,
|
||||
return false;
|
||||
}
|
||||
|
||||
const float3 uv3 = primitive_surface_attribute<float3>(kg, sd, desc_uv, nullptr, nullptr);
|
||||
const float face_id_f = primitive_surface_attribute<float>(
|
||||
kg, sd, desc_face_id, nullptr, nullptr);
|
||||
const float3 uv3 = primitive_surface_attribute<float3>(kg, sd, desc_uv).val;
|
||||
const float face_id_f = primitive_surface_attribute<float>(kg, sd, desc_face_id).val;
|
||||
|
||||
*uv = make_float2(uv3.x, uv3.y);
|
||||
*face_id = (int)face_id_f;
|
||||
@@ -152,7 +138,7 @@ ccl_device float3 primitive_tangent(KernelGlobals kg, ccl_private ShaderData *sd
|
||||
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED);
|
||||
|
||||
if (desc.offset != ATTR_STD_NOT_FOUND) {
|
||||
float3 data = primitive_surface_attribute<float3>(kg, sd, desc, nullptr, nullptr);
|
||||
float3 data = primitive_surface_attribute<float3>(kg, sd, desc).val;
|
||||
data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f);
|
||||
object_normal_transform(kg, sd, &data);
|
||||
return cross(sd->N, normalize(cross(data, sd->N)));
|
||||
@@ -211,11 +197,9 @@ ccl_device_forceinline float4 primitive_motion_vector(KernelGlobals kg,
|
||||
|
||||
#if defined(__HAIR__) || defined(__POINTCLOUD__)
|
||||
if (is_curve_or_point) {
|
||||
motion_pre = make_float3(
|
||||
primitive_surface_attribute<float4>(kg, sd, desc, nullptr, nullptr));
|
||||
motion_pre = make_float3(primitive_surface_attribute<float4>(kg, sd, desc).val);
|
||||
desc.offset += numverts;
|
||||
motion_post = make_float3(
|
||||
primitive_surface_attribute<float4>(kg, sd, desc, nullptr, nullptr));
|
||||
motion_post = make_float3(primitive_surface_attribute<float4>(kg, sd, desc).val);
|
||||
|
||||
/* Curve */
|
||||
if ((sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) {
|
||||
@@ -228,9 +212,9 @@ ccl_device_forceinline float4 primitive_motion_vector(KernelGlobals kg,
|
||||
if (sd->type & PRIMITIVE_TRIANGLE)
|
||||
{
|
||||
/* Triangle */
|
||||
motion_pre = triangle_attribute<float3>(kg, sd, desc, nullptr, nullptr);
|
||||
motion_pre = triangle_attribute<float3>(kg, sd, desc).val;
|
||||
desc.offset += numverts;
|
||||
motion_post = triangle_attribute<float3>(kg, sd, desc, nullptr, nullptr);
|
||||
motion_post = triangle_attribute<float3>(kg, sd, desc).val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -222,12 +222,13 @@ ccl_device_inline T triangle_attribute_dfdy(const ccl_private differential &du,
|
||||
/* Read attributes on various triangle elements, and compute the partial derivatives if requested.
|
||||
*/
|
||||
template<typename T>
|
||||
ccl_device T triangle_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
ccl_private T *dfdx,
|
||||
ccl_private T *dfdy)
|
||||
ccl_device dual<T> triangle_attribute(KernelGlobals kg,
|
||||
const ccl_private ShaderData *sd,
|
||||
const AttributeDescriptor desc,
|
||||
const bool dx = false,
|
||||
const bool dy = false)
|
||||
{
|
||||
dual<T> result;
|
||||
if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER |
|
||||
ATTR_ELEMENT_CORNER_BYTE))
|
||||
{
|
||||
@@ -256,29 +257,22 @@ ccl_device T triangle_attribute(KernelGlobals kg,
|
||||
}
|
||||
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dfdx) {
|
||||
*dfdx = triangle_attribute_dfdx(sd->du, sd->dv, f0, f1, f2);
|
||||
if (dx) {
|
||||
result.dx = triangle_attribute_dfdx(sd->du, sd->dv, f0, f1, f2);
|
||||
}
|
||||
if (dfdy) {
|
||||
*dfdy = triangle_attribute_dfdy(sd->du, sd->dv, f0, f1, f2);
|
||||
if (dy) {
|
||||
result.dy = triangle_attribute_dfdy(sd->du, sd->dv, f0, f1, f2);
|
||||
}
|
||||
#endif
|
||||
|
||||
return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
|
||||
result.val = sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0;
|
||||
return result;
|
||||
}
|
||||
#ifdef __RAY_DIFFERENTIALS__
|
||||
if (dfdx) {
|
||||
*dfdx = make_zero<T>();
|
||||
}
|
||||
if (dfdy) {
|
||||
*dfdy = make_zero<T>();
|
||||
}
|
||||
#endif
|
||||
|
||||
if (desc.element == ATTR_ELEMENT_FACE) {
|
||||
return attribute_data_fetch<T>(kg, desc.offset + sd->prim);
|
||||
return dual<T>(attribute_data_fetch<T>(kg, desc.offset + sd->prim));
|
||||
}
|
||||
return make_zero<T>();
|
||||
return make_zero<dual<T>>();
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -222,18 +222,16 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg,
|
||||
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
|
||||
kernel_assert(desc.offset != ATTR_STD_NOT_FOUND);
|
||||
|
||||
differential3 tmp_dP;
|
||||
sd->P = primitive_surface_attribute<float3>(kg, sd, desc, &tmp_dP.dx, &tmp_dP.dy);
|
||||
dual3 P = primitive_surface_attribute<float3>(kg, sd, desc, true, true);
|
||||
|
||||
object_position_transform(kg, sd, &sd->P);
|
||||
object_dir_transform(kg, sd, &tmp_dP.dx);
|
||||
object_dir_transform(kg, sd, &tmp_dP.dy);
|
||||
object_position_transform(kg, sd, &P);
|
||||
|
||||
sd->dP = differential_make_compact(tmp_dP);
|
||||
sd->P = P.val;
|
||||
sd->dP = differential_make_compact(P);
|
||||
|
||||
globals.P = sd->P;
|
||||
globals.dPdx = tmp_dP.dx;
|
||||
globals.dPdy = tmp_dP.dy;
|
||||
globals.dPdx = P.dx;
|
||||
globals.dPdy = P.dy;
|
||||
}
|
||||
|
||||
/* Execute bump shader. */
|
||||
|
||||
@@ -409,118 +409,63 @@ bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals * /*sg*/,
|
||||
return false;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline bool set_attribute(
|
||||
const T v, const T dx, const T dy, TypeDesc type, bool derivatives, void *val);
|
||||
|
||||
inline void set_data_float(
|
||||
const float v, const float dx, const float dy, bool derivatives, void *val)
|
||||
{
|
||||
float *fval = static_cast<float *>(val);
|
||||
fval[0] = v;
|
||||
if (derivatives) {
|
||||
fval[1] = dx;
|
||||
fval[2] = dy;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_data_float3(
|
||||
const float3 v, const float3 dx, const float3 dy, bool derivatives, void *val)
|
||||
{
|
||||
float *fval = static_cast<float *>(val);
|
||||
fval[0] = v.x;
|
||||
fval[1] = v.y;
|
||||
fval[2] = v.z;
|
||||
if (derivatives) {
|
||||
fval[3] = dx.x;
|
||||
fval[4] = dx.y;
|
||||
fval[5] = dx.z;
|
||||
fval[6] = dy.x;
|
||||
fval[7] = dy.y;
|
||||
fval[8] = dy.z;
|
||||
}
|
||||
}
|
||||
|
||||
inline void set_data_float4(
|
||||
const float4 v, const float4 dx, const float4 dy, bool derivatives, void *val)
|
||||
{
|
||||
float *fval = static_cast<float *>(val);
|
||||
fval[0] = v.x;
|
||||
fval[1] = v.y;
|
||||
fval[2] = v.z;
|
||||
fval[3] = v.w;
|
||||
if (derivatives) {
|
||||
fval[4] = dx.x;
|
||||
fval[5] = dx.y;
|
||||
fval[6] = dx.z;
|
||||
fval[7] = dx.w;
|
||||
fval[8] = dy.x;
|
||||
fval[9] = dy.y;
|
||||
fval[10] = dy.z;
|
||||
fval[11] = dy.w;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_template_spec bool set_attribute(
|
||||
const float v, const float dx, const float dy, TypeDesc type, bool derivatives, void *val)
|
||||
ccl_device_template_spec bool set_attribute(const dual1 v,
|
||||
TypeDesc type,
|
||||
bool derivatives,
|
||||
void *val)
|
||||
{
|
||||
if (type == TypeFloatArray4) {
|
||||
set_data_float4(make_float4(v, v, v, 1.0f),
|
||||
make_float4(dx, dx, dx, 0.0f),
|
||||
make_float4(dy, dy, dy, 0.0f),
|
||||
derivatives,
|
||||
val);
|
||||
set_data_float4(make_float4(make_float3(v)), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) {
|
||||
set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val);
|
||||
set_data_float3(make_float3(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypeFloat) {
|
||||
set_data_float(v, dx, dy, derivatives, val);
|
||||
set_data_float(v, derivatives, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ccl_device_template_spec bool set_attribute(
|
||||
const float2 v, const float2 dx, const float2 dy, TypeDesc type, bool derivatives, void *val)
|
||||
ccl_device_template_spec bool set_attribute(const dual2 v,
|
||||
TypeDesc type,
|
||||
bool derivatives,
|
||||
void *val)
|
||||
{
|
||||
if (type == TypeFloatArray4) {
|
||||
set_data_float4(make_float4(v.x, v.y, 0.0f, 1.0f),
|
||||
make_float4(dx.x, dx.y, 0.0f, 0.0f),
|
||||
make_float4(dy.x, dy.y, 0.0f, 0.0f),
|
||||
derivatives,
|
||||
val);
|
||||
set_data_float4(make_float4(make_float3(v)), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) {
|
||||
set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val);
|
||||
set_data_float3(make_float3(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypeFloat) {
|
||||
set_data_float(average(v), average(dx), average(dy), derivatives, val);
|
||||
set_data_float(average(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ccl_device_template_spec bool set_attribute(
|
||||
const float3 v, const float3 dx, const float3 dy, TypeDesc type, bool derivatives, void *val)
|
||||
ccl_device_template_spec bool set_attribute(const dual3 v,
|
||||
TypeDesc type,
|
||||
bool derivatives,
|
||||
void *val)
|
||||
{
|
||||
if (type == TypeFloatArray4) {
|
||||
set_data_float4(
|
||||
make_float4(v, 1.0f), make_float4(dx, 0.0f), make_float4(dy, 0.0f), derivatives, val);
|
||||
set_data_float4(make_float4(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) {
|
||||
set_data_float3(v, dx, dy, derivatives, val);
|
||||
set_data_float3(v, derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypeFloat) {
|
||||
set_data_float(average(v), average(dx), average(dy), derivatives, val);
|
||||
set_data_float(average(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -535,23 +480,21 @@ ccl_device_template_spec bool set_attribute(
|
||||
* this for the correct operation of the Attribute node.
|
||||
*/
|
||||
|
||||
ccl_device_template_spec bool set_attribute(
|
||||
const float4 v, const float4 dx, const float4 dy, TypeDesc type, bool derivatives, void *val)
|
||||
ccl_device_template_spec bool set_attribute(const dual4 v,
|
||||
TypeDesc type,
|
||||
bool derivatives,
|
||||
void *val)
|
||||
{
|
||||
if (type == TypeFloatArray4) {
|
||||
set_data_float4(v, dx, dy, derivatives, val);
|
||||
set_data_float4(v, derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) {
|
||||
set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val);
|
||||
set_data_float3(make_float3(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if (type == TypeFloat) {
|
||||
set_data_float(average(make_float3(v)),
|
||||
average(make_float3(dx)),
|
||||
average(make_float3(dy)),
|
||||
derivatives,
|
||||
val);
|
||||
set_data_float(average(make_float3(v)), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@@ -560,7 +503,7 @@ ccl_device_template_spec bool set_attribute(
|
||||
template<typename T>
|
||||
ccl_device_inline bool set_attribute(const T f, const TypeDesc type, bool derivatives, void *val)
|
||||
{
|
||||
return set_attribute(f, make_zero<T>(), make_zero<T>(), type, derivatives, val);
|
||||
return set_attribute(dual<T>(f), type, derivatives, val);
|
||||
}
|
||||
|
||||
ccl_device_template_spec bool set_attribute(const int i,
|
||||
@@ -610,17 +553,9 @@ static bool set_attribute_float3_3(const float3 P[3], TypeDesc type, bool deriva
|
||||
if (type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) {
|
||||
float *fval = (float *)val;
|
||||
|
||||
fval[0] = P[0].x;
|
||||
fval[1] = P[0].y;
|
||||
fval[2] = P[0].z;
|
||||
|
||||
fval[3] = P[1].x;
|
||||
fval[4] = P[1].y;
|
||||
fval[5] = P[1].z;
|
||||
|
||||
fval[6] = P[2].x;
|
||||
fval[7] = P[2].y;
|
||||
fval[8] = P[2].z;
|
||||
copy_v3_v3(fval, P[0]);
|
||||
copy_v3_v3(fval + 3, P[1]);
|
||||
copy_v3_v3(fval + 6, P[2]);
|
||||
|
||||
if (type.arraylen > 3) {
|
||||
memset(fval + 3 * 3, 0, sizeof(float) * 3 * (type.arraylen - 3));
|
||||
@@ -653,20 +588,17 @@ inline bool get_object_attribute_impl(const ThreadKernelGlobalsCPU *kg,
|
||||
bool derivatives,
|
||||
void *val)
|
||||
{
|
||||
T v;
|
||||
T dx = make_zero<T>();
|
||||
T dy = make_zero<T>();
|
||||
dual<T> data;
|
||||
#ifdef __VOLUME__
|
||||
if (primitive_is_volume_attribute(sd)) {
|
||||
v = primitive_volume_attribute<T>(kg, sd, desc, true);
|
||||
data.val = primitive_volume_attribute<T>(kg, sd, desc, true);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
v = primitive_surface_attribute<T>(
|
||||
kg, sd, desc, derivatives ? &dx : nullptr, derivatives ? &dy : nullptr);
|
||||
data = primitive_surface_attribute<T>(kg, sd, desc, derivatives, derivatives);
|
||||
}
|
||||
return set_attribute(v, dx, dy, type, derivatives, val);
|
||||
return set_attribute(data, type, derivatives, val);
|
||||
}
|
||||
|
||||
static bool get_object_attribute(const ThreadKernelGlobalsCPU *kg,
|
||||
@@ -867,11 +799,11 @@ bool OSLRenderServices::get_object_standard_attribute(
|
||||
return false;
|
||||
}
|
||||
if (name == u_bump_map_normal) {
|
||||
float3 f[3];
|
||||
dual3 f;
|
||||
if (!attribute_bump_map_normal(kg, sd, f)) {
|
||||
return false;
|
||||
}
|
||||
return set_attribute(f[0], f[1], f[2], type, derivatives, val);
|
||||
return set_attribute(f, type, derivatives, val);
|
||||
}
|
||||
return get_background_attribute(globals, name, type, derivatives, val);
|
||||
}
|
||||
@@ -936,29 +868,24 @@ bool OSLRenderServices::get_background_attribute(
|
||||
|
||||
if (name == u_ndc) {
|
||||
/* NDC coordinates with special exception for orthographic projection. */
|
||||
float3 ndc[3];
|
||||
dual3 ndc;
|
||||
|
||||
if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
|
||||
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
|
||||
{
|
||||
ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
|
||||
|
||||
if (derivatives) {
|
||||
ndc[1] = zero_float3();
|
||||
ndc[2] = zero_float3();
|
||||
}
|
||||
ndc.val = camera_world_to_ndc(kg, sd, sd->ray_P);
|
||||
}
|
||||
else {
|
||||
ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
|
||||
ndc.val = camera_world_to_ndc(kg, sd, sd->P);
|
||||
|
||||
if (derivatives) {
|
||||
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
|
||||
ndc[1] = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc[0];
|
||||
ndc[2] = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc[0];
|
||||
ndc.dx = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc.val;
|
||||
ndc.dy = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc.val;
|
||||
}
|
||||
}
|
||||
|
||||
return set_attribute(ndc[0], ndc[1], ndc[2], type, derivatives, val);
|
||||
return set_attribute(ndc, type, derivatives, val);
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -1682,17 +1609,17 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg,
|
||||
}
|
||||
if (name == u_P) {
|
||||
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
|
||||
return set_attribute(sd->P, dP.dx, dP.dy, type, derivatives, val);
|
||||
return set_attribute(dual3(sd->P, dP.dx, dP.dy), type, derivatives, val);
|
||||
}
|
||||
if (name == u_I) {
|
||||
const differential3 dI = differential_from_compact(sd->wi, sd->dI);
|
||||
return set_attribute(sd->wi, dI.dx, dI.dy, type, derivatives, val);
|
||||
return set_attribute(dual3(sd->wi, dI.dx, dI.dy), type, derivatives, val);
|
||||
}
|
||||
if (name == u_u) {
|
||||
return set_attribute(sd->u, sd->du.dx, sd->du.dy, type, derivatives, val);
|
||||
return set_attribute(dual1(sd->u, sd->du.dx, sd->du.dy), type, derivatives, val);
|
||||
}
|
||||
if (name == u_v) {
|
||||
return set_attribute(sd->v, sd->dv.dx, sd->dv.dy, type, derivatives, val);
|
||||
return set_attribute(dual1(sd->v, sd->dv.dx, sd->dv.dy), type, derivatives, val);
|
||||
}
|
||||
|
||||
return get_attribute(sg, derivatives, u_empty, type, name, val);
|
||||
|
||||
@@ -425,67 +425,7 @@ ccl_device_extern bool osl_get_inverse_matrix(ccl_private ShaderGlobals *sg,
|
||||
|
||||
/* Attributes */
|
||||
|
||||
typedef long long TypeDesc;
|
||||
|
||||
template<typename T>
|
||||
ccl_device_inline bool set_attribute(const T v,
|
||||
const T dx,
|
||||
const T dy,
|
||||
const TypeDesc type,
|
||||
bool derivatives,
|
||||
ccl_private void *val);
|
||||
|
||||
ccl_device_inline void set_data_float(
|
||||
const float v, const float dx, const float dy, bool derivatives, ccl_private void *val)
|
||||
{
|
||||
ccl_private float *fval = static_cast<ccl_private float *>(val);
|
||||
fval[0] = v;
|
||||
if (derivatives) {
|
||||
fval[1] = dx;
|
||||
fval[2] = dy;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void set_data_float3(
|
||||
const float3 v, const float3 dx, const float3 dy, bool derivatives, ccl_private void *val)
|
||||
{
|
||||
ccl_private float *fval = static_cast<ccl_private float *>(val);
|
||||
fval[0] = v.x;
|
||||
fval[1] = v.y;
|
||||
fval[2] = v.z;
|
||||
if (derivatives) {
|
||||
fval[3] = dx.x;
|
||||
fval[4] = dx.y;
|
||||
fval[5] = dx.z;
|
||||
fval[6] = dy.x;
|
||||
fval[7] = dy.y;
|
||||
fval[8] = dy.z;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void set_data_float4(
|
||||
const float4 v, const float4 dx, const float4 dy, bool derivatives, ccl_private void *val)
|
||||
{
|
||||
ccl_private float *fval = static_cast<ccl_private float *>(val);
|
||||
fval[0] = v.x;
|
||||
fval[1] = v.y;
|
||||
fval[2] = v.z;
|
||||
fval[3] = v.w;
|
||||
if (derivatives) {
|
||||
fval[4] = dx.x;
|
||||
fval[5] = dx.y;
|
||||
fval[6] = dx.z;
|
||||
fval[7] = dx.w;
|
||||
fval[8] = dy.x;
|
||||
fval[9] = dy.y;
|
||||
fval[10] = dy.z;
|
||||
fval[11] = dy.w;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_template_spec bool set_attribute(const float v,
|
||||
const float dx,
|
||||
const float dy,
|
||||
ccl_device_template_spec bool set_attribute(const dual1 v,
|
||||
const TypeDesc type,
|
||||
bool derivatives,
|
||||
ccl_private void *val)
|
||||
@@ -497,29 +437,23 @@ ccl_device_template_spec bool set_attribute(const float v,
|
||||
if (type_basetype == 11 /* TypeDesc::FLOAT */) {
|
||||
if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3))
|
||||
{
|
||||
set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val);
|
||||
set_data_float3(make_float3(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4))
|
||||
{
|
||||
set_data_float4(make_float4(v, v, v, 1.0f),
|
||||
make_float4(dx, dx, dx, 0.0f),
|
||||
make_float4(dy, dy, dy, 0.0f),
|
||||
derivatives,
|
||||
val);
|
||||
set_data_float4(make_float4(make_float3(v)), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
|
||||
set_data_float(v, dx, dy, derivatives, val);
|
||||
set_data_float(v, derivatives, val);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
ccl_device_template_spec bool set_attribute(const float2 v,
|
||||
const float2 dx,
|
||||
const float2 dy,
|
||||
ccl_device_template_spec bool set_attribute(const dual2 v,
|
||||
const TypeDesc type,
|
||||
bool derivatives,
|
||||
ccl_private void *val)
|
||||
@@ -531,28 +465,22 @@ ccl_device_template_spec bool set_attribute(const float2 v,
|
||||
if (type_basetype == 11 /* TypeDesc::FLOAT */) {
|
||||
if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3))
|
||||
{
|
||||
set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val);
|
||||
set_data_float3(make_float3(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4))
|
||||
{
|
||||
set_data_float4(make_float4(v.x, v.y, 0.0f, 1.0f),
|
||||
make_float4(dx.x, dx.y, 0.0f, 0.0f),
|
||||
make_float4(dy.x, dy.y, 0.0f, 0.0f),
|
||||
derivatives,
|
||||
val);
|
||||
set_data_float4(make_float4(make_float3(v)), derivatives, val);
|
||||
}
|
||||
if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
|
||||
set_data_float(average(v), average(dx), average(dy), derivatives, val);
|
||||
set_data_float(average(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
ccl_device_template_spec bool set_attribute(const float3 v,
|
||||
const float3 dx,
|
||||
const float3 dy,
|
||||
ccl_device_template_spec bool set_attribute(const dual3 v,
|
||||
const TypeDesc type,
|
||||
bool derivatives,
|
||||
ccl_private void *val)
|
||||
@@ -564,26 +492,23 @@ ccl_device_template_spec bool set_attribute(const float3 v,
|
||||
if (type_basetype == 11 /* TypeDesc::FLOAT */) {
|
||||
if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3))
|
||||
{
|
||||
set_data_float3(v, dx, dy, derivatives, val);
|
||||
set_data_float3(v, derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4))
|
||||
{
|
||||
set_data_float4(
|
||||
make_float4(v, 1.0f), make_float4(dx, 0.0f), make_float4(dy, 0.0f), derivatives, val);
|
||||
set_data_float4(make_float4(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
|
||||
set_data_float(average(v), average(dx), average(dy), derivatives, val);
|
||||
set_data_float(average(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
ccl_device_template_spec bool set_attribute(const float4 v,
|
||||
const float4 dx,
|
||||
const float4 dy,
|
||||
ccl_device_template_spec bool set_attribute(const dual4 v,
|
||||
const TypeDesc type,
|
||||
bool derivatives,
|
||||
ccl_private void *val)
|
||||
@@ -595,20 +520,16 @@ ccl_device_template_spec bool set_attribute(const float4 v,
|
||||
if (type_basetype == 11 /* TypeDesc::FLOAT */) {
|
||||
if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3))
|
||||
{
|
||||
set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val);
|
||||
set_data_float3(make_float3(v), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4))
|
||||
{
|
||||
set_data_float4(v, dx, dy, derivatives, val);
|
||||
set_data_float4(v, derivatives, val);
|
||||
return true;
|
||||
}
|
||||
if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) {
|
||||
set_data_float(average(make_float3(v)),
|
||||
average(make_float3(dx)),
|
||||
average(make_float3(dy)),
|
||||
derivatives,
|
||||
val);
|
||||
set_data_float(average(make_float3(v)), derivatives, val);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -622,7 +543,7 @@ ccl_device_inline bool set_attribute(const T f,
|
||||
bool derivatives,
|
||||
ccl_private void *val)
|
||||
{
|
||||
return set_attribute(f, make_zero<T>(), make_zero<T>(), type, derivatives, val);
|
||||
return set_attribute(dual<T>(f), type, derivatives, val);
|
||||
}
|
||||
|
||||
ccl_device_inline bool set_attribute_matrix(const ccl_private Transform &tfm,
|
||||
@@ -730,29 +651,24 @@ ccl_device_inline bool get_background_attribute(KernelGlobals kg,
|
||||
|
||||
else if (name == DeviceStrings::u_ndc) {
|
||||
/* NDC coordinates with special exception for orthographic projection. */
|
||||
float3 ndc[3];
|
||||
dual3 ndc;
|
||||
|
||||
if ((sg->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE &&
|
||||
kernel_data.cam.type == CAMERA_ORTHOGRAPHIC)
|
||||
{
|
||||
ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P);
|
||||
|
||||
if (derivatives) {
|
||||
ndc[1] = zero_float3();
|
||||
ndc[2] = zero_float3();
|
||||
}
|
||||
ndc.val = camera_world_to_ndc(kg, sd, sd->ray_P);
|
||||
}
|
||||
else {
|
||||
ndc[0] = camera_world_to_ndc(kg, sd, sd->P);
|
||||
ndc.val = camera_world_to_ndc(kg, sd, sd->P);
|
||||
|
||||
if (derivatives) {
|
||||
const differential3 dP = differential_from_compact(sd->Ng, sd->dP);
|
||||
ndc[1] = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc[0];
|
||||
ndc[2] = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc[0];
|
||||
ndc.dx = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc.val;
|
||||
ndc.dy = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc.val;
|
||||
}
|
||||
}
|
||||
|
||||
return set_attribute(ndc[0], ndc[1], ndc[2], type, derivatives, val);
|
||||
return set_attribute(ndc, type, derivatives, val);
|
||||
}
|
||||
|
||||
return false;
|
||||
@@ -766,20 +682,17 @@ ccl_device_inline bool get_object_attribute_impl(KernelGlobals kg,
|
||||
bool derivatives,
|
||||
ccl_private void *val)
|
||||
{
|
||||
T v;
|
||||
T dx = make_zero<T>();
|
||||
T dy = make_zero<T>();
|
||||
dual<T> data;
|
||||
#ifdef __VOLUME__
|
||||
if (primitive_is_volume_attribute(sd)) {
|
||||
v = primitive_volume_attribute<T>(kg, sd, desc, true);
|
||||
data.val = primitive_volume_attribute<T>(kg, sd, desc, true);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
v = primitive_surface_attribute<T>(
|
||||
kg, sd, desc, derivatives ? &dx : nullptr, derivatives ? &dy : nullptr);
|
||||
data = primitive_surface_attribute<T>(kg, sd, desc, derivatives, derivatives);
|
||||
}
|
||||
return set_attribute(v, dx, dy, type, derivatives, val);
|
||||
return set_attribute(data, type, derivatives, val);
|
||||
}
|
||||
|
||||
ccl_device_inline bool get_object_attribute(KernelGlobals kg,
|
||||
@@ -974,11 +887,11 @@ ccl_device_inline bool get_object_standard_attribute(KernelGlobals kg,
|
||||
}
|
||||
}
|
||||
if (name == DeviceStrings::u_bump_map_normal) {
|
||||
float3 f[3];
|
||||
dual3 f;
|
||||
if (!attribute_bump_map_normal(kg, sd, f)) {
|
||||
return false;
|
||||
}
|
||||
return set_attribute(f[0], f[1], f[2], type, derivatives, val);
|
||||
return set_attribute(f, type, derivatives, val);
|
||||
}
|
||||
|
||||
return get_background_attribute(kg, sg, sd, name, type, derivatives, val);
|
||||
|
||||
@@ -11,11 +11,49 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
/* TODO: deduplicate function `set_attribute_float3()` in CPU and GPU. */
|
||||
#ifdef __KERNEL_OPTIX__
|
||||
typedef long long TypeDesc;
|
||||
#endif
|
||||
|
||||
template<typename T>
|
||||
ccl_device_inline bool set_attribute(const dual<T> v,
|
||||
const TypeDesc type,
|
||||
bool derivatives,
|
||||
ccl_private void *val);
|
||||
|
||||
ccl_device_inline void set_data_float(const dual1 data, bool derivatives, ccl_private void *val)
|
||||
{
|
||||
ccl_private float *fval = static_cast<ccl_private float *>(val);
|
||||
fval[0] = data.val;
|
||||
if (derivatives) {
|
||||
fval[1] = data.dx;
|
||||
fval[2] = data.dy;
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void set_data_float3(const dual3 data, bool derivatives, ccl_private void *val)
|
||||
{
|
||||
ccl_private float *fval = static_cast<ccl_private float *>(val);
|
||||
copy_v3_v3(fval, data.val);
|
||||
if (derivatives) {
|
||||
copy_v3_v3(fval + 3, data.dx);
|
||||
copy_v3_v3(fval + 6, data.dy);
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device_inline void set_data_float4(const dual4 data, bool derivatives, ccl_private void *val)
|
||||
{
|
||||
ccl_private float *fval = static_cast<ccl_private float *>(val);
|
||||
copy_v4_v4(fval, data.val);
|
||||
if (derivatives) {
|
||||
copy_v4_v4(fval + 4, data.dx);
|
||||
copy_v4_v4(fval + 8, data.dy);
|
||||
}
|
||||
}
|
||||
|
||||
ccl_device bool attribute_bump_map_normal(KernelGlobals kg,
|
||||
ccl_private const ShaderData *sd,
|
||||
float3 f[3])
|
||||
ccl_private dual3 &f)
|
||||
{
|
||||
if (!(sd->type & PRIMITIVE_TRIANGLE) || !(sd->shader & SHADER_SMOOTH_NORMAL)) {
|
||||
/* TODO: implement for curve. */
|
||||
@@ -29,29 +67,27 @@ ccl_device bool attribute_bump_map_normal(KernelGlobals kg,
|
||||
object_inverse_normal_transform(kg, sd, &Ng);
|
||||
|
||||
if (sd->type == PRIMITIVE_TRIANGLE) {
|
||||
f[0] = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v, sd->du, sd->dv, f[1], f[2]);
|
||||
f.val = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v, sd->du, sd->dv, f.dx, f.dy);
|
||||
}
|
||||
else {
|
||||
assert(sd->type & PRIMITIVE_MOTION_TRIANGLE);
|
||||
f[0] = motion_triangle_smooth_normal(
|
||||
kg, Ng, sd->object, sd->prim, sd->time, sd->u, sd->v, sd->du, sd->dv, f[1], f[2]);
|
||||
f.val = motion_triangle_smooth_normal(
|
||||
kg, Ng, sd->object, sd->prim, sd->time, sd->u, sd->v, sd->du, sd->dv, f.dx, f.dy);
|
||||
}
|
||||
|
||||
if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) {
|
||||
/* Transform to local space. */
|
||||
object_inverse_normal_transform(kg, sd, f);
|
||||
object_inverse_normal_transform(kg, sd, f + 1);
|
||||
object_inverse_normal_transform(kg, sd, f + 2);
|
||||
object_inverse_normal_transform(kg, sd, &f.val);
|
||||
object_inverse_normal_transform(kg, sd, &f.dx);
|
||||
object_inverse_normal_transform(kg, sd, &f.dy);
|
||||
}
|
||||
|
||||
if (backfacing) {
|
||||
f[0] = -f[0];
|
||||
f[1] = -f[1];
|
||||
f[2] = -f[2];
|
||||
f = -f;
|
||||
}
|
||||
|
||||
f[1] -= f[0];
|
||||
f[2] -= f[0];
|
||||
f.dx -= f.val;
|
||||
f.dy -= f.val;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -122,7 +122,7 @@ ccl_device_inline void svm_surface_attr(KernelGlobals kg,
|
||||
ccl_private float *stack,
|
||||
const uint out_offset)
|
||||
{
|
||||
T f = primitive_surface_attribute<T>(kg, sd, desc, nullptr, nullptr);
|
||||
T f = primitive_surface_attribute<T>(kg, sd, desc).val;
|
||||
svm_node_attr_store(type, stack, out_offset, f);
|
||||
}
|
||||
|
||||
@@ -135,10 +135,9 @@ ccl_device_inline void svm_surface_attr_dx(KernelGlobals kg,
|
||||
ccl_private float *stack,
|
||||
const uint out_offset)
|
||||
{
|
||||
T dfdx;
|
||||
T f = primitive_surface_attribute<T>(kg, sd, desc, &dfdx, nullptr);
|
||||
f += dfdx * bump_filter_width;
|
||||
svm_node_attr_store(type, stack, out_offset, f);
|
||||
dual<T> f = primitive_surface_attribute<T>(kg, sd, desc, true, false);
|
||||
f.val += f.dx * bump_filter_width;
|
||||
svm_node_attr_store(type, stack, out_offset, f.val);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
@@ -150,10 +149,9 @@ ccl_device_inline void svm_surface_attr_dy(KernelGlobals kg,
|
||||
ccl_private float *stack,
|
||||
const uint out_offset)
|
||||
{
|
||||
T dfdy;
|
||||
T f = primitive_surface_attribute<T>(kg, sd, desc, nullptr, &dfdy);
|
||||
f += dfdy * bump_filter_width;
|
||||
svm_node_attr_store(type, stack, out_offset, f);
|
||||
dual<T> f = primitive_surface_attribute<T>(kg, sd, desc, false, true);
|
||||
f.val += f.dy * bump_filter_width;
|
||||
svm_node_attr_store(type, stack, out_offset, f.val);
|
||||
}
|
||||
|
||||
template<uint node_feature_mask>
|
||||
|
||||
@@ -31,19 +31,16 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg,
|
||||
const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED);
|
||||
|
||||
if (desc.offset != ATTR_STD_NOT_FOUND) {
|
||||
differential3 dP;
|
||||
float3 P = primitive_surface_attribute<float3>(kg, sd, desc, &dP.dx, &dP.dy);
|
||||
dual3 P = primitive_surface_attribute<float3>(kg, sd, desc, true, true);
|
||||
|
||||
object_position_transform(kg, sd, &P);
|
||||
object_dir_transform(kg, sd, &dP.dx);
|
||||
object_dir_transform(kg, sd, &dP.dy);
|
||||
|
||||
sd->P = P;
|
||||
sd->dP = differential_make_compact(dP);
|
||||
sd->P = P.val;
|
||||
sd->dP = differential_make_compact(P);
|
||||
|
||||
/* Save the full differential, the compact form isn't enough for svm_node_set_bump. */
|
||||
stack_store_float3(stack, offset + 4, dP.dx);
|
||||
stack_store_float3(stack, offset + 7, dP.dy);
|
||||
stack_store_float3(stack, offset + 4, P.dx);
|
||||
stack_store_float3(stack, offset + 7, P.dy);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -845,7 +845,7 @@ ccl_device
|
||||
const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node2.y);
|
||||
float random = 0.0f;
|
||||
if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) {
|
||||
random = primitive_surface_attribute<float>(kg, sd, attr_descr_random, nullptr, nullptr);
|
||||
random = primitive_surface_attribute<float>(kg, sd, attr_descr_random).val;
|
||||
}
|
||||
else {
|
||||
random = stack_load_float_default(stack, random_ofs, data_node3.y);
|
||||
@@ -977,7 +977,7 @@ ccl_device
|
||||
if (bsdf->aspect_ratio != 1.0f) {
|
||||
/* Align ellipse major axis with the curve normal direction. */
|
||||
const AttributeDescriptor attr_descr_normal = find_attribute(kg, sd, shared_ofs2);
|
||||
bsdf->N = curve_attribute<float3>(kg, sd, attr_descr_normal, nullptr, nullptr);
|
||||
bsdf->N = curve_attribute<float3>(kg, sd, attr_descr_normal).val;
|
||||
}
|
||||
|
||||
bsdf->roughness = roughness;
|
||||
|
||||
@@ -206,7 +206,7 @@ ccl_device_noinline int svm_node_vector_displacement(KernelGlobals kg,
|
||||
const AttributeDescriptor attr = find_attribute(kg, sd, node.z);
|
||||
float3 tangent;
|
||||
if (attr.offset != ATTR_STD_NOT_FOUND) {
|
||||
tangent = primitive_surface_attribute<float3>(kg, sd, attr, nullptr, nullptr);
|
||||
tangent = primitive_surface_attribute<float3>(kg, sd, attr).val;
|
||||
}
|
||||
else {
|
||||
tangent = normalize(sd->dPdu);
|
||||
@@ -215,7 +215,7 @@ ccl_device_noinline int svm_node_vector_displacement(KernelGlobals kg,
|
||||
float3 bitangent = safe_normalize(cross(normal, tangent));
|
||||
const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w);
|
||||
if (attr_sign.offset != ATTR_STD_NOT_FOUND) {
|
||||
const float sign = primitive_surface_attribute<float>(kg, sd, attr_sign, nullptr, nullptr);
|
||||
const float sign = primitive_surface_attribute<float>(kg, sd, attr_sign).val;
|
||||
bitangent *= sign;
|
||||
}
|
||||
|
||||
|
||||
@@ -356,8 +356,8 @@ ccl_device_noinline void svm_node_normal_map(KernelGlobals kg,
|
||||
}
|
||||
|
||||
/* get _unnormalized_ interpolated normal and tangent */
|
||||
const float3 tangent = primitive_surface_attribute<float3>(kg, sd, attr, nullptr, nullptr);
|
||||
const float sign = primitive_surface_attribute<float>(kg, sd, attr_sign, nullptr, nullptr);
|
||||
const float3 tangent = primitive_surface_attribute<float3>(kg, sd, attr).val;
|
||||
const float sign = primitive_surface_attribute<float>(kg, sd, attr_sign).val;
|
||||
float3 normal;
|
||||
|
||||
if (sd->shader & SHADER_SMOOTH_NORMAL) {
|
||||
@@ -441,13 +441,13 @@ ccl_device_noinline void svm_node_tangent(KernelGlobals kg,
|
||||
const AttributeDescriptor desc = find_attribute(kg, sd, node.z);
|
||||
if (desc.offset != ATTR_STD_NOT_FOUND) {
|
||||
if (desc.type == NODE_ATTR_FLOAT2) {
|
||||
const float2 value = primitive_surface_attribute<float2>(kg, sd, desc, nullptr, nullptr);
|
||||
const float2 value = primitive_surface_attribute<float2>(kg, sd, desc).val;
|
||||
attribute_value.x = value.x;
|
||||
attribute_value.y = value.y;
|
||||
attribute_value.z = 0.0f;
|
||||
}
|
||||
else {
|
||||
attribute_value = primitive_surface_attribute<float3>(kg, sd, desc, nullptr, nullptr);
|
||||
attribute_value = primitive_surface_attribute<float3>(kg, sd, desc).val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,11 +24,7 @@ ccl_device_inline float3 stack_load_float3(const ccl_private float *stack, const
|
||||
ccl_device_inline void stack_store_float3(ccl_private float *stack, const uint a, const float3 f)
|
||||
{
|
||||
kernel_assert(a + 2 < SVM_STACK_SIZE);
|
||||
|
||||
ccl_private float *stack_a = stack + a;
|
||||
stack_a[0] = f.x;
|
||||
stack_a[1] = f.y;
|
||||
stack_a[2] = f.z;
|
||||
copy_v3_v3(stack + a, f);
|
||||
}
|
||||
|
||||
ccl_device_inline float stack_load_float(const ccl_private float *stack, const uint a)
|
||||
|
||||
@@ -24,14 +24,12 @@ ccl_device_noinline void svm_node_vertex_color(KernelGlobals kg,
|
||||
const AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
|
||||
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
|
||||
if (descriptor.type == NODE_ATTR_FLOAT4 || descriptor.type == NODE_ATTR_RGBA) {
|
||||
const float4 vertex_color = primitive_surface_attribute<float4>(
|
||||
kg, sd, descriptor, nullptr, nullptr);
|
||||
const float4 vertex_color = primitive_surface_attribute<float4>(kg, sd, descriptor).val;
|
||||
stack_store_float3(stack, color_offset, make_float3(vertex_color));
|
||||
stack_store_float(stack, alpha_offset, vertex_color.w);
|
||||
}
|
||||
else {
|
||||
const float3 vertex_color = primitive_surface_attribute<float3>(
|
||||
kg, sd, descriptor, nullptr, nullptr);
|
||||
const float3 vertex_color = primitive_surface_attribute<float3>(kg, sd, descriptor).val;
|
||||
stack_store_float3(stack, color_offset, vertex_color);
|
||||
stack_store_float(stack, alpha_offset, 1.0f);
|
||||
}
|
||||
@@ -56,19 +54,15 @@ ccl_device_noinline void svm_node_vertex_color_bump_dx(KernelGlobals kg,
|
||||
const AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
|
||||
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
|
||||
if (descriptor.type == NODE_ATTR_FLOAT4 || descriptor.type == NODE_ATTR_RGBA) {
|
||||
float4 dfdx;
|
||||
float4 vertex_color = primitive_surface_attribute<float4>(
|
||||
kg, sd, descriptor, &dfdx, nullptr);
|
||||
vertex_color += dfdx * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, make_float3(vertex_color));
|
||||
stack_store_float(stack, alpha_offset, vertex_color.w);
|
||||
dual4 vertex_color = primitive_surface_attribute<float4>(kg, sd, descriptor, true, false);
|
||||
vertex_color.val += vertex_color.dx * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, make_float3(vertex_color.val));
|
||||
stack_store_float(stack, alpha_offset, vertex_color.val.w);
|
||||
}
|
||||
else {
|
||||
float3 dfdx;
|
||||
float3 vertex_color = primitive_surface_attribute<float3>(
|
||||
kg, sd, descriptor, &dfdx, nullptr);
|
||||
vertex_color += dfdx * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, vertex_color);
|
||||
dual3 vertex_color = primitive_surface_attribute<float3>(kg, sd, descriptor, true, false);
|
||||
vertex_color.val += vertex_color.dx * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, vertex_color.val);
|
||||
stack_store_float(stack, alpha_offset, 1.0f);
|
||||
}
|
||||
}
|
||||
@@ -92,19 +86,15 @@ ccl_device_noinline void svm_node_vertex_color_bump_dy(KernelGlobals kg,
|
||||
const AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id);
|
||||
if (descriptor.offset != ATTR_STD_NOT_FOUND) {
|
||||
if (descriptor.type == NODE_ATTR_FLOAT4 || descriptor.type == NODE_ATTR_RGBA) {
|
||||
float4 dfdy;
|
||||
float4 vertex_color = primitive_surface_attribute<float4>(
|
||||
kg, sd, descriptor, nullptr, &dfdy);
|
||||
vertex_color += dfdy * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, make_float3(vertex_color));
|
||||
stack_store_float(stack, alpha_offset, vertex_color.w);
|
||||
dual4 vertex_color = primitive_surface_attribute<float4>(kg, sd, descriptor, false, true);
|
||||
vertex_color.val += vertex_color.dy * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, make_float3(vertex_color.val));
|
||||
stack_store_float(stack, alpha_offset, vertex_color.val.w);
|
||||
}
|
||||
else {
|
||||
float3 dfdy;
|
||||
float3 vertex_color = primitive_surface_attribute<float3>(
|
||||
kg, sd, descriptor, nullptr, &dfdy);
|
||||
vertex_color += dfdy * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, vertex_color);
|
||||
dual3 vertex_color = primitive_surface_attribute<float3>(kg, sd, descriptor, false, true);
|
||||
vertex_color.val += vertex_color.dy * bump_filter_width;
|
||||
stack_store_float3(stack, color_offset, vertex_color.val);
|
||||
stack_store_float(stack, alpha_offset, 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,6 +125,11 @@ ccl_device_forceinline float differential_make_compact(const differential3 dD)
|
||||
return 0.5f * (len(dD.dx) + len(dD.dy));
|
||||
}
|
||||
|
||||
ccl_device_forceinline float differential_make_compact(const dual3 D)
|
||||
{
|
||||
return 0.5f * (len(D.dx) + len(D.dy));
|
||||
}
|
||||
|
||||
ccl_device_forceinline float differential_incoming_compact(const float dD)
|
||||
{
|
||||
return dD;
|
||||
|
||||
@@ -75,6 +75,7 @@ set(SRC_HEADERS
|
||||
math_int3.h
|
||||
math_int4.h
|
||||
math_int8.h
|
||||
math_dual.h
|
||||
md5.h
|
||||
murmurhash.h
|
||||
nanovdb.h
|
||||
@@ -121,6 +122,7 @@ set(SRC_HEADERS
|
||||
types_uint3.h
|
||||
types_uint4.h
|
||||
types_ushort4.h
|
||||
types_dual.h
|
||||
unique_ptr.h
|
||||
unique_ptr_vector.h
|
||||
vector.h
|
||||
|
||||
@@ -19,4 +19,6 @@
|
||||
|
||||
#include "util/math_float3.h" // IWYU pragma: export
|
||||
|
||||
#include "util/math_dual.h" // IWYU pragma: export
|
||||
|
||||
#include "util/rect.h" // IWYU pragma: export
|
||||
|
||||
59
intern/cycles/util/math_dual.h
Normal file
59
intern/cycles/util/math_dual.h
Normal file
@@ -0,0 +1,59 @@
|
||||
/* SPDX-FileCopyrightText: 2025 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/math_base.h"
|
||||
#include "util/types_dual.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
ccl_device_template_spec dual1 make_zero()
|
||||
{
|
||||
return dual1();
|
||||
}
|
||||
|
||||
ccl_device_template_spec dual2 make_zero()
|
||||
{
|
||||
return dual2();
|
||||
}
|
||||
|
||||
ccl_device_template_spec dual3 make_zero()
|
||||
{
|
||||
return dual3();
|
||||
}
|
||||
|
||||
ccl_device_template_spec dual4 make_zero()
|
||||
{
|
||||
return dual4();
|
||||
}
|
||||
|
||||
/* Multiplication of dual by scalar. */
|
||||
template<class T1, class T2> ccl_device_inline dual<T1> operator*(const dual<T1> a, T2 b)
|
||||
{
|
||||
return {a.val * b, a.dx * b, a.dy * b};
|
||||
}
|
||||
|
||||
/* Negation. */
|
||||
template<class T> ccl_device_inline dual<T> operator-(const ccl_private dual<T> &a)
|
||||
{
|
||||
return {-a.val, -a.dx, -a.dy};
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline dual1 average(const dual<T> a)
|
||||
{
|
||||
return {average(a.val), average(a.dx), average(a.dy)};
|
||||
}
|
||||
|
||||
template<class T> ccl_device_inline dual1 reduce_add(const dual<T> a)
|
||||
{
|
||||
return {reduce_add(a.val), reduce_add(a.dx), reduce_add(a.dy)};
|
||||
}
|
||||
|
||||
template<class T1, class T2> ccl_device_inline dual1 dot(const dual<T1> a, const T2 b)
|
||||
{
|
||||
return reduce_add(a * b);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
@@ -749,4 +749,11 @@ ccl_device_inline float2 map_to_sphere(const float3 co)
|
||||
return make_float2(u, v);
|
||||
}
|
||||
|
||||
ccl_device_inline void copy_v3_v3(ccl_private float *r, const float3 val)
|
||||
{
|
||||
r[0] = val.x;
|
||||
r[1] = val.y;
|
||||
r[2] = val.z;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -655,4 +655,12 @@ ccl_device_inline float4 __int4_as_float4(const int4 i)
|
||||
}
|
||||
#endif /* !defined(__KERNEL_METAL__) && !defined(__KERNEL_ONEAPI__) */
|
||||
|
||||
ccl_device_inline void copy_v4_v4(ccl_private float *r, const float4 val)
|
||||
{
|
||||
r[0] = val.x;
|
||||
r[1] = val.y;
|
||||
r[2] = val.z;
|
||||
r[3] = val.w;
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -55,32 +55,72 @@ ccl_device_inline float3 transform_point(const ccl_global Transform *t, const fl
|
||||
|
||||
ccl_device_inline float3 transform_point(const ccl_private Transform *t, const float3 a)
|
||||
{
|
||||
/* TODO(sergey): Disabled for now, causes crashes in certain cases. */
|
||||
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
|
||||
const float4 aa(a.m128);
|
||||
|
||||
float4 x(_mm_loadu_ps(&t->x.x));
|
||||
float4 y(_mm_loadu_ps(&t->y.x));
|
||||
float4 z(_mm_loadu_ps(&t->z.x));
|
||||
float4 x = t->x;
|
||||
float4 y = t->y;
|
||||
float4 z = t->z;
|
||||
float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f));
|
||||
|
||||
_MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
|
||||
|
||||
float4 tmp = w;
|
||||
tmp = madd(shuffle<2>(aa), z, tmp);
|
||||
tmp = madd(shuffle<1>(aa), y, tmp);
|
||||
tmp = madd(shuffle<0>(aa), x, tmp);
|
||||
|
||||
return float3(tmp.m128);
|
||||
#elif defined(__KERNEL_METAL__)
|
||||
const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t);
|
||||
return (a * b).xyz + make_float3(t->x.w, t->y.w, t->z.w);
|
||||
#else
|
||||
float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w,
|
||||
a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w,
|
||||
a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w);
|
||||
const float4 a_ = make_homogeneous(a);
|
||||
return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z));
|
||||
#endif
|
||||
}
|
||||
|
||||
return c;
|
||||
ccl_device_inline dual3 transform_point(const ccl_private Transform *t, const dual3 a)
|
||||
{
|
||||
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
|
||||
/* NOTE: `dot()` has large lantency on Intel platforms, the following method of transpose + madd
|
||||
* is faster. However, we did not measure on Neon platforms, it might be that `dot()` is fine
|
||||
* there, and we can use the simpler implementation at the end of the function. */
|
||||
float4 x = t->x;
|
||||
float4 y = t->y;
|
||||
float4 z = t->z;
|
||||
float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f));
|
||||
_MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
|
||||
|
||||
float4 tmp = w;
|
||||
dual3 result;
|
||||
{
|
||||
const float4 aa(a.val.m128);
|
||||
tmp = madd(shuffle<2>(aa), z, tmp);
|
||||
tmp = madd(shuffle<1>(aa), y, tmp);
|
||||
tmp = madd(shuffle<0>(aa), x, tmp);
|
||||
result.val = float3(tmp.m128);
|
||||
}
|
||||
|
||||
{
|
||||
const float4 dx(a.dx.m128);
|
||||
tmp = shuffle<2>(dx) * z;
|
||||
tmp = madd(shuffle<1>(dx), y, tmp);
|
||||
tmp = madd(shuffle<0>(dx), x, tmp);
|
||||
result.dx = float3(tmp.m128);
|
||||
}
|
||||
|
||||
{
|
||||
const float4 dy(a.dy.m128);
|
||||
tmp = shuffle<2>(dy) * z;
|
||||
tmp = madd(shuffle<1>(dy), y, tmp);
|
||||
tmp = madd(shuffle<0>(dy), x, tmp);
|
||||
result.dy = float3(tmp.m128);
|
||||
}
|
||||
|
||||
return result;
|
||||
#elif defined(__KERNEL_METAL__)
|
||||
const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t);
|
||||
return {(a.val * b).xyz + make_float3(t->x.w, t->y.w, t->z.w), (a.dx * b).xyz, (a.dy * b).xyz};
|
||||
#else
|
||||
const dual4 a_ = make_homogeneous(a);
|
||||
return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z));
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -88,28 +128,21 @@ ccl_device_inline float3 transform_direction(const ccl_private Transform *t, con
|
||||
{
|
||||
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
|
||||
const float4 aa(a.m128);
|
||||
|
||||
float4 x(_mm_loadu_ps(&t->x.x));
|
||||
float4 y(_mm_loadu_ps(&t->y.x));
|
||||
float4 z(_mm_loadu_ps(&t->z.x));
|
||||
float4 x = t->x;
|
||||
float4 y = t->y;
|
||||
float4 z = t->z;
|
||||
float4 w(_mm_setzero_ps());
|
||||
|
||||
_MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
|
||||
|
||||
float4 tmp = shuffle<2>(aa) * z;
|
||||
tmp = madd(shuffle<1>(aa), y, tmp);
|
||||
tmp = madd(shuffle<0>(aa), x, tmp);
|
||||
|
||||
return float3(tmp.m128);
|
||||
#elif defined(__KERNEL_METAL__)
|
||||
const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t);
|
||||
return (a * b).xyz;
|
||||
#else
|
||||
float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z,
|
||||
a.x * t->y.x + a.y * t->y.y + a.z * t->y.z,
|
||||
a.x * t->z.x + a.y * t->z.y + a.z * t->z.z);
|
||||
|
||||
return c;
|
||||
const float4 a_ = make_float4(a, 0.0f);
|
||||
return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -27,3 +27,5 @@
|
||||
#include "util/types_float8.h" // IWYU pragma: export
|
||||
|
||||
#include "util/types_spectrum.h" // IWYU pragma: export
|
||||
|
||||
#include "util/types_dual.h" // IWYU pragma: export
|
||||
|
||||
136
intern/cycles/util/types_dual.h
Normal file
136
intern/cycles/util/types_dual.h
Normal file
@@ -0,0 +1,136 @@
|
||||
/* SPDX-FileCopyrightText: 2025 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "util/types_float2.h"
|
||||
#include "util/types_float3.h"
|
||||
#include "util/types_float4.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
template<class T> struct dual {
|
||||
T val, dx, dy;
|
||||
dual<T>() = default;
|
||||
ccl_device_inline_method explicit dual<T>(const T val) : val(val) {}
|
||||
ccl_device_inline_method dual<T>(const T val, const T dx, const T dy) : val(val), dx(dx), dy(dy)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct dual<float2> {
|
||||
float2 val = make_float2(0.0f);
|
||||
float2 dx = make_float2(0.0f);
|
||||
float2 dy = make_float2(0.0f);
|
||||
dual<float2>() = default;
|
||||
ccl_device_inline_method explicit dual<float2>(const float2 val) : val(val) {}
|
||||
ccl_device_inline_method dual<float2>(const float2 val, const float2 dx, const float2 dy)
|
||||
: val(val), dx(dx), dy(dy)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct dual<float3> {
|
||||
float3 val = make_float3(0.0f);
|
||||
float3 dx = make_float3(0.0f);
|
||||
float3 dy = make_float3(0.0f);
|
||||
dual<float3>() = default;
|
||||
ccl_device_inline_method explicit dual<float3>(const float3 val) : val(val) {}
|
||||
ccl_device_inline_method dual<float3>(const float3 val, const float3 dx, const float3 dy)
|
||||
: val(val), dx(dx), dy(dy)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
template<> struct dual<float4> {
|
||||
float4 val = make_float4(0.0f);
|
||||
float4 dx = make_float4(0.0f);
|
||||
float4 dy = make_float4(0.0f);
|
||||
dual<float4>() = default;
|
||||
ccl_device_inline_method explicit dual<float4>(const float4 val) : val(val) {}
|
||||
ccl_device_inline_method dual<float4>(const float4 val, const float4 dx, const float4 dy)
|
||||
: val(val), dx(dx), dy(dy)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
using dual1 = dual<float>;
|
||||
using dual2 = dual<float2>;
|
||||
using dual3 = dual<float3>;
|
||||
using dual4 = dual<float4>;
|
||||
|
||||
template<class T> ccl_device_inline dual3 make_float3(const ccl_private dual<T> &a)
|
||||
{
|
||||
return {make_float3(a.val), make_float3(a.dx), make_float3(a.dy)};
|
||||
}
|
||||
|
||||
ccl_device_inline dual3 make_float3(const dual1 a, const dual1 b, const dual1 c)
|
||||
{
|
||||
return {make_float3(a.val, b.val, c.val),
|
||||
make_float3(a.dx, b.dx, c.dx),
|
||||
make_float3(a.dy, b.dy, c.dy)};
|
||||
}
|
||||
|
||||
ccl_device_inline dual4 make_float4(const dual3 a)
|
||||
{
|
||||
return {make_float4(a.val), make_float4(a.dx, 0.0f), make_float4(a.dy, 0.0f)};
|
||||
}
|
||||
|
||||
ccl_device_inline dual4 make_homogeneous(const dual3 a)
|
||||
{
|
||||
return {make_float4(a.val, 1.0f), make_float4(a.dx, 0.0f), make_float4(a.dy, 0.0f)};
|
||||
}
|
||||
|
||||
ccl_device_inline void print_dual1(const ccl_private char *label, const dual1 a)
|
||||
{
|
||||
#ifdef __KERNEL_PRINTF__
|
||||
printf("%s: {\nval = %.8f\n dx = %.8f\n dy = %.8f\n}\n",
|
||||
label,
|
||||
(double)a.val,
|
||||
(double)a.dx,
|
||||
(double)a.dy);
|
||||
#else
|
||||
(void)label;
|
||||
(void)a;
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline void print_dual2(const ccl_private char *label, const dual2 a)
|
||||
{
|
||||
#ifdef __KERNEL_PRINTF__
|
||||
printf("%s: {\nval = %.8f %.8f\n dx = %.8f %.8f\n dy = %.8f %.8f\n}\n",
|
||||
label,
|
||||
(double)a.val.x,
|
||||
(double)a.val.y,
|
||||
(double)a.dx.x,
|
||||
(double)a.dx.y,
|
||||
(double)a.dy.x,
|
||||
(double)a.dy.y);
|
||||
#else
|
||||
(void)label;
|
||||
(void)a;
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline void print_dual3(const ccl_private char *label, const dual3 a)
|
||||
{
|
||||
#ifdef __KERNEL_PRINTF__
|
||||
printf("%s: {\nval = %.8f %.8f %.8f\n dx = %.8f %.8f %.8f\n dy = %.8f %.8f %.8f\n}\n",
|
||||
label,
|
||||
(double)a.val.x,
|
||||
(double)a.val.y,
|
||||
(double)a.val.z,
|
||||
(double)a.dx.x,
|
||||
(double)a.dx.y,
|
||||
(double)a.dx.z,
|
||||
(double)a.dy.x,
|
||||
(double)a.dy.y,
|
||||
(double)a.dy.z);
|
||||
#else
|
||||
(void)label;
|
||||
(void)a;
|
||||
#endif
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
@@ -92,6 +92,11 @@ ccl_device_inline float4 make_float4(const float3 a)
|
||||
return make_float4(a.x, a.y, a.z, 1.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float4 make_homogeneous(const float3 a)
|
||||
{
|
||||
return make_float4(a.x, a.y, a.z, 1.0f);
|
||||
}
|
||||
|
||||
ccl_device_inline float4 make_float4(const int4 i)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
|
||||
Reference in New Issue
Block a user