From ea45c776fdf47ec35a55173f23fc33afc74ebba3 Mon Sep 17 00:00:00 2001 From: Weizhen Huang Date: Thu, 24 Jul 2025 12:05:01 +0200 Subject: [PATCH] Cycles: introduce dual types to replace some uses of dfdx/dfdy/differentials. No functional change expected. Pull Request: https://projects.blender.org/blender/blender/pulls/143178 --- intern/cycles/kernel/CMakeLists.txt | 2 + intern/cycles/kernel/geom/curve.h | 38 ++--- intern/cycles/kernel/geom/object.h | 3 +- intern/cycles/kernel/geom/point.h | 27 +--- intern/cycles/kernel/geom/primitive.h | 52 +++---- intern/cycles/kernel/geom/triangle.h | 34 ++--- intern/cycles/kernel/osl/osl.h | 14 +- intern/cycles/kernel/osl/services.cpp | 169 ++++++--------------- intern/cycles/kernel/osl/services_gpu.h | 145 ++++-------------- intern/cycles/kernel/osl/services_shared.h | 62 ++++++-- intern/cycles/kernel/svm/attribute.h | 16 +- intern/cycles/kernel/svm/bump.h | 13 +- intern/cycles/kernel/svm/closure.h | 4 +- intern/cycles/kernel/svm/displace.h | 4 +- intern/cycles/kernel/svm/tex_coord.h | 8 +- intern/cycles/kernel/svm/util.h | 6 +- intern/cycles/kernel/svm/vertex_color.h | 42 ++--- intern/cycles/kernel/util/differential.h | 5 + intern/cycles/util/CMakeLists.txt | 2 + intern/cycles/util/math.h | 2 + intern/cycles/util/math_dual.h | 59 +++++++ intern/cycles/util/math_float3.h | 7 + intern/cycles/util/math_float4.h | 8 + intern/cycles/util/transform.h | 81 +++++++--- intern/cycles/util/types.h | 2 + intern/cycles/util/types_dual.h | 136 +++++++++++++++++ intern/cycles/util/types_float4.h | 5 + 27 files changed, 511 insertions(+), 435 deletions(-) create mode 100644 intern/cycles/util/math_dual.h create mode 100644 intern/cycles/util/types_dual.h diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 60508f1473b..5ebcbaee76a 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -400,6 +400,7 @@ set(SRC_UTIL_HEADERS ../util/math_int3.h ../util/math_int4.h ../util/math_int8.h + ../util/math_dual.h ../util/projection.h ../util/projection_inverse.h ../util/rect.h @@ -424,6 +425,7 @@ set(SRC_UTIL_HEADERS ../util/types_uint3.h ../util/types_uint4.h ../util/types_ushort4.h + ../util/types_dual.h ) set(LIB diff --git a/intern/cycles/kernel/geom/curve.h b/intern/cycles/kernel/geom/curve.h index 48830e3b7dc..4ff71fca529 100644 --- a/intern/cycles/kernel/geom/curve.h +++ b/intern/cycles/kernel/geom/curve.h @@ -46,12 +46,13 @@ ccl_device_inline T curve_attribute_dfdy(const ccl_private differential &du, /* Read attributes on various curve elements, and compute the partial derivatives if requested. */ template -ccl_device T curve_attribute(KernelGlobals kg, - const ccl_private ShaderData *sd, - const AttributeDescriptor desc, - ccl_private T *dfdx, - ccl_private T *dfdy) +ccl_device dual curve_attribute(KernelGlobals kg, + const ccl_private ShaderData *sd, + const AttributeDescriptor desc, + const bool dx = false, + const bool dy = false) { + dual result; if (desc.element & (ATTR_ELEMENT_CURVE_KEY | ATTR_ELEMENT_CURVE_KEY_MOTION)) { const KernelCurve curve = kernel_data_fetch(curves, sd->prim); const int k0 = curve.first_key + PRIMITIVE_UNPACK_SEGMENT(sd->type); @@ -61,34 +62,27 @@ ccl_device T curve_attribute(KernelGlobals kg, const T f1 = attribute_data_fetch(kg, desc.offset + k1); # ifdef __RAY_DIFFERENTIALS__ - if (dfdx) { - *dfdx = curve_attribute_dfdx(sd->du, f0, f1); + if (dx) { + result.dx = curve_attribute_dfdx(sd->du, f0, f1); } - if (dfdy) { - *dfdy = curve_attribute_dfdy(sd->du, f0, f1); + if (dy) { + result.dy = curve_attribute_dfdy(sd->du, f0, f1); } # endif - return mix(f0, f1, sd->u); + result.val = mix(f0, f1, sd->u); + return result; } /* idea: we can't derive any useful differentials here, but for tiled * mipmap image caching it would be useful to avoid reading the highest * detail level always. maybe a derivative based on the hair density * could be computed somehow? */ -# ifdef __RAY_DIFFERENTIALS__ - if (dfdx) { - *dfdx = make_zero(); - } - if (dfdy) { - *dfdy = make_zero(); - } -# endif if (desc.element == ATTR_ELEMENT_CURVE) { - return attribute_data_fetch(kg, desc.offset + sd->prim); + return dual(attribute_data_fetch(kg, desc.offset + sd->prim)); } - return make_zero(); + return make_zero>(); } /* Curve thickness */ @@ -127,9 +121,7 @@ ccl_device float curve_random(KernelGlobals kg, const ccl_private ShaderData *sd { if (sd->type & PRIMITIVE_CURVE) { const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_CURVE_RANDOM); - return (desc.offset != ATTR_STD_NOT_FOUND) ? - curve_attribute(kg, sd, desc, nullptr, nullptr) : - 0.0f; + return (desc.offset != ATTR_STD_NOT_FOUND) ? curve_attribute(kg, sd, desc).val : 0.0f; } return 0.0f; } diff --git a/intern/cycles/kernel/geom/object.h b/intern/cycles/kernel/geom/object.h index c45527b760e..2c656d90c3c 100644 --- a/intern/cycles/kernel/geom/object.h +++ b/intern/cycles/kernel/geom/object.h @@ -130,9 +130,10 @@ ccl_device_inline Transform lamp_get_inverse_transform(KernelGlobals kg, /* Transform position from object to world space */ +template ccl_device_inline void object_position_transform(KernelGlobals kg, const ccl_private ShaderData *sd, - ccl_private float3 *P) + ccl_private T *P) { #ifdef __OBJECT_MOTION__ if (sd->object_flag & SD_OBJECT_MOTION) { diff --git a/intern/cycles/kernel/geom/point.h b/intern/cycles/kernel/geom/point.h index a1844288f93..fd2993d66bc 100644 --- a/intern/cycles/kernel/geom/point.h +++ b/intern/cycles/kernel/geom/point.h @@ -22,25 +22,16 @@ CCL_NAMESPACE_BEGIN /* Reading attributes on various point elements */ template -ccl_device T point_attribute(KernelGlobals kg, - const ccl_private ShaderData *sd, - const AttributeDescriptor desc, - ccl_private T *dx, - ccl_private T *dy) +ccl_device dual point_attribute(KernelGlobals kg, + const ccl_private ShaderData *sd, + const AttributeDescriptor desc, + const bool /* dx */ = false, + const bool /* dy */ = false) { -# ifdef __RAY_DIFFERENTIALS__ - if (dx) { - *dx = make_zero(); - } - if (dy) { - *dy = make_zero(); - } -# endif - if (desc.element == ATTR_ELEMENT_VERTEX) { - return attribute_data_fetch(kg, desc.offset + sd->prim); + return dual(attribute_data_fetch(kg, desc.offset + sd->prim)); } - return make_zero(); + return make_zero>(); } /* Point position */ @@ -90,9 +81,7 @@ ccl_device float point_random(KernelGlobals kg, const ccl_private ShaderData *sd { if (sd->type & PRIMITIVE_POINT) { const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POINT_RANDOM); - return (desc.offset != ATTR_STD_NOT_FOUND) ? - point_attribute(kg, sd, desc, nullptr, nullptr) : - 0.0f; + return (desc.offset != ATTR_STD_NOT_FOUND) ? point_attribute(kg, sd, desc).val : 0.0f; } return 0.0f; } diff --git a/intern/cycles/kernel/geom/primitive.h b/intern/cycles/kernel/geom/primitive.h index 0ee6483800d..b6647a5e190 100644 --- a/intern/cycles/kernel/geom/primitive.h +++ b/intern/cycles/kernel/geom/primitive.h @@ -29,44 +29,31 @@ CCL_NAMESPACE_BEGIN * heavy volume interpolation code. */ template -ccl_device_forceinline T primitive_surface_attribute(KernelGlobals kg, - const ccl_private ShaderData *sd, - const AttributeDescriptor desc, - ccl_private T *dfdx, - ccl_private T *dfdy) +ccl_device_forceinline dual primitive_surface_attribute(KernelGlobals kg, + const ccl_private ShaderData *sd, + const AttributeDescriptor desc, + const bool dx = false, + const bool dy = false) { if (desc.element & (ATTR_ELEMENT_OBJECT | ATTR_ELEMENT_MESH)) { - if (dfdx) { - *dfdx = make_zero(); - } - if (dfdy) { - *dfdy = make_zero(); - } - - return attribute_data_fetch(kg, desc.offset); + return dual(attribute_data_fetch(kg, desc.offset)); } if (sd->type & PRIMITIVE_TRIANGLE) { - return triangle_attribute(kg, sd, desc, dfdx, dfdy); + return triangle_attribute(kg, sd, desc, dx, dy); } #ifdef __HAIR__ if (sd->type & PRIMITIVE_CURVE) { - return curve_attribute(kg, sd, desc, dfdx, dfdy); + return curve_attribute(kg, sd, desc, dx, dy); } #endif #ifdef __POINTCLOUD__ else if (sd->type & PRIMITIVE_POINT) { - return point_attribute(kg, sd, desc, dfdx, dfdy); + return point_attribute(kg, sd, desc, dx, dy); } #endif else { - if (dfdx) { - *dfdx = make_zero(); - } - if (dfdy) { - *dfdy = make_zero(); - } - return make_zero(); + return make_zero>(); } } @@ -105,7 +92,7 @@ ccl_device_forceinline float3 primitive_uv(KernelGlobals kg, const ccl_private S return make_float3(0.0f, 0.0f, 0.0f); } - const float2 uv = primitive_surface_attribute(kg, sd, desc, nullptr, nullptr); + const float2 uv = primitive_surface_attribute(kg, sd, desc).val; return make_float3(uv.x, uv.y, 1.0f); } @@ -124,9 +111,8 @@ ccl_device bool primitive_ptex(KernelGlobals kg, return false; } - const float3 uv3 = primitive_surface_attribute(kg, sd, desc_uv, nullptr, nullptr); - const float face_id_f = primitive_surface_attribute( - kg, sd, desc_face_id, nullptr, nullptr); + const float3 uv3 = primitive_surface_attribute(kg, sd, desc_uv).val; + const float face_id_f = primitive_surface_attribute(kg, sd, desc_face_id).val; *uv = make_float2(uv3.x, uv3.y); *face_id = (int)face_id_f; @@ -152,7 +138,7 @@ ccl_device float3 primitive_tangent(KernelGlobals kg, ccl_private ShaderData *sd const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_GENERATED); if (desc.offset != ATTR_STD_NOT_FOUND) { - float3 data = primitive_surface_attribute(kg, sd, desc, nullptr, nullptr); + float3 data = primitive_surface_attribute(kg, sd, desc).val; data = make_float3(-(data.y - 0.5f), (data.x - 0.5f), 0.0f); object_normal_transform(kg, sd, &data); return cross(sd->N, normalize(cross(data, sd->N))); @@ -211,11 +197,9 @@ ccl_device_forceinline float4 primitive_motion_vector(KernelGlobals kg, #if defined(__HAIR__) || defined(__POINTCLOUD__) if (is_curve_or_point) { - motion_pre = make_float3( - primitive_surface_attribute(kg, sd, desc, nullptr, nullptr)); + motion_pre = make_float3(primitive_surface_attribute(kg, sd, desc).val); desc.offset += numverts; - motion_post = make_float3( - primitive_surface_attribute(kg, sd, desc, nullptr, nullptr)); + motion_post = make_float3(primitive_surface_attribute(kg, sd, desc).val); /* Curve */ if ((sd->object_flag & SD_OBJECT_HAS_VERTEX_MOTION) == 0) { @@ -228,9 +212,9 @@ ccl_device_forceinline float4 primitive_motion_vector(KernelGlobals kg, if (sd->type & PRIMITIVE_TRIANGLE) { /* Triangle */ - motion_pre = triangle_attribute(kg, sd, desc, nullptr, nullptr); + motion_pre = triangle_attribute(kg, sd, desc).val; desc.offset += numverts; - motion_post = triangle_attribute(kg, sd, desc, nullptr, nullptr); + motion_post = triangle_attribute(kg, sd, desc).val; } } diff --git a/intern/cycles/kernel/geom/triangle.h b/intern/cycles/kernel/geom/triangle.h index ca13bfb0231..2e993f06e71 100644 --- a/intern/cycles/kernel/geom/triangle.h +++ b/intern/cycles/kernel/geom/triangle.h @@ -222,12 +222,13 @@ ccl_device_inline T triangle_attribute_dfdy(const ccl_private differential &du, /* Read attributes on various triangle elements, and compute the partial derivatives if requested. */ template -ccl_device T triangle_attribute(KernelGlobals kg, - const ccl_private ShaderData *sd, - const AttributeDescriptor desc, - ccl_private T *dfdx, - ccl_private T *dfdy) +ccl_device dual triangle_attribute(KernelGlobals kg, + const ccl_private ShaderData *sd, + const AttributeDescriptor desc, + const bool dx = false, + const bool dy = false) { + dual result; if (desc.element & (ATTR_ELEMENT_VERTEX | ATTR_ELEMENT_VERTEX_MOTION | ATTR_ELEMENT_CORNER | ATTR_ELEMENT_CORNER_BYTE)) { @@ -256,29 +257,22 @@ ccl_device T triangle_attribute(KernelGlobals kg, } #ifdef __RAY_DIFFERENTIALS__ - if (dfdx) { - *dfdx = triangle_attribute_dfdx(sd->du, sd->dv, f0, f1, f2); + if (dx) { + result.dx = triangle_attribute_dfdx(sd->du, sd->dv, f0, f1, f2); } - if (dfdy) { - *dfdy = triangle_attribute_dfdy(sd->du, sd->dv, f0, f1, f2); + if (dy) { + result.dy = triangle_attribute_dfdy(sd->du, sd->dv, f0, f1, f2); } #endif - return sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0; + result.val = sd->u * f1 + sd->v * f2 + (1.0f - sd->u - sd->v) * f0; + return result; } -#ifdef __RAY_DIFFERENTIALS__ - if (dfdx) { - *dfdx = make_zero(); - } - if (dfdy) { - *dfdy = make_zero(); - } -#endif if (desc.element == ATTR_ELEMENT_FACE) { - return attribute_data_fetch(kg, desc.offset + sd->prim); + return dual(attribute_data_fetch(kg, desc.offset + sd->prim)); } - return make_zero(); + return make_zero>(); } CCL_NAMESPACE_END diff --git a/intern/cycles/kernel/osl/osl.h b/intern/cycles/kernel/osl/osl.h index 3db29cc6a18..b43fb1e5ab1 100644 --- a/intern/cycles/kernel/osl/osl.h +++ b/intern/cycles/kernel/osl/osl.h @@ -222,18 +222,16 @@ ccl_device_inline void osl_eval_nodes(KernelGlobals kg, const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED); kernel_assert(desc.offset != ATTR_STD_NOT_FOUND); - differential3 tmp_dP; - sd->P = primitive_surface_attribute(kg, sd, desc, &tmp_dP.dx, &tmp_dP.dy); + dual3 P = primitive_surface_attribute(kg, sd, desc, true, true); - object_position_transform(kg, sd, &sd->P); - object_dir_transform(kg, sd, &tmp_dP.dx); - object_dir_transform(kg, sd, &tmp_dP.dy); + object_position_transform(kg, sd, &P); - sd->dP = differential_make_compact(tmp_dP); + sd->P = P.val; + sd->dP = differential_make_compact(P); globals.P = sd->P; - globals.dPdx = tmp_dP.dx; - globals.dPdy = tmp_dP.dy; + globals.dPdx = P.dx; + globals.dPdy = P.dy; } /* Execute bump shader. */ diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 9e936c88c8a..3435f7fe7d8 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -409,118 +409,63 @@ bool OSLRenderServices::get_array_attribute(OSL::ShaderGlobals * /*sg*/, return false; } -template -inline bool set_attribute( - const T v, const T dx, const T dy, TypeDesc type, bool derivatives, void *val); - -inline void set_data_float( - const float v, const float dx, const float dy, bool derivatives, void *val) -{ - float *fval = static_cast(val); - fval[0] = v; - if (derivatives) { - fval[1] = dx; - fval[2] = dy; - } -} - -inline void set_data_float3( - const float3 v, const float3 dx, const float3 dy, bool derivatives, void *val) -{ - float *fval = static_cast(val); - fval[0] = v.x; - fval[1] = v.y; - fval[2] = v.z; - if (derivatives) { - fval[3] = dx.x; - fval[4] = dx.y; - fval[5] = dx.z; - fval[6] = dy.x; - fval[7] = dy.y; - fval[8] = dy.z; - } -} - -inline void set_data_float4( - const float4 v, const float4 dx, const float4 dy, bool derivatives, void *val) -{ - float *fval = static_cast(val); - fval[0] = v.x; - fval[1] = v.y; - fval[2] = v.z; - fval[3] = v.w; - if (derivatives) { - fval[4] = dx.x; - fval[5] = dx.y; - fval[6] = dx.z; - fval[7] = dx.w; - fval[8] = dy.x; - fval[9] = dy.y; - fval[10] = dy.z; - fval[11] = dy.w; - } -} - -ccl_device_template_spec bool set_attribute( - const float v, const float dx, const float dy, TypeDesc type, bool derivatives, void *val) +ccl_device_template_spec bool set_attribute(const dual1 v, + TypeDesc type, + bool derivatives, + void *val) { if (type == TypeFloatArray4) { - set_data_float4(make_float4(v, v, v, 1.0f), - make_float4(dx, dx, dx, 0.0f), - make_float4(dy, dy, dy, 0.0f), - derivatives, - val); + set_data_float4(make_float4(make_float3(v)), derivatives, val); return true; } if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) { - set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val); + set_data_float3(make_float3(v), derivatives, val); return true; } if (type == TypeFloat) { - set_data_float(v, dx, dy, derivatives, val); + set_data_float(v, derivatives, val); return true; } return false; } -ccl_device_template_spec bool set_attribute( - const float2 v, const float2 dx, const float2 dy, TypeDesc type, bool derivatives, void *val) +ccl_device_template_spec bool set_attribute(const dual2 v, + TypeDesc type, + bool derivatives, + void *val) { if (type == TypeFloatArray4) { - set_data_float4(make_float4(v.x, v.y, 0.0f, 1.0f), - make_float4(dx.x, dx.y, 0.0f, 0.0f), - make_float4(dy.x, dy.y, 0.0f, 0.0f), - derivatives, - val); + set_data_float4(make_float4(make_float3(v)), derivatives, val); return true; } if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) { - set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val); + set_data_float3(make_float3(v), derivatives, val); return true; } if (type == TypeFloat) { - set_data_float(average(v), average(dx), average(dy), derivatives, val); + set_data_float(average(v), derivatives, val); return true; } return false; } -ccl_device_template_spec bool set_attribute( - const float3 v, const float3 dx, const float3 dy, TypeDesc type, bool derivatives, void *val) +ccl_device_template_spec bool set_attribute(const dual3 v, + TypeDesc type, + bool derivatives, + void *val) { if (type == TypeFloatArray4) { - set_data_float4( - make_float4(v, 1.0f), make_float4(dx, 0.0f), make_float4(dy, 0.0f), derivatives, val); + set_data_float4(make_float4(v), derivatives, val); return true; } if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) { - set_data_float3(v, dx, dy, derivatives, val); + set_data_float3(v, derivatives, val); return true; } if (type == TypeFloat) { - set_data_float(average(v), average(dx), average(dy), derivatives, val); + set_data_float(average(v), derivatives, val); return true; } @@ -535,23 +480,21 @@ ccl_device_template_spec bool set_attribute( * this for the correct operation of the Attribute node. */ -ccl_device_template_spec bool set_attribute( - const float4 v, const float4 dx, const float4 dy, TypeDesc type, bool derivatives, void *val) +ccl_device_template_spec bool set_attribute(const dual4 v, + TypeDesc type, + bool derivatives, + void *val) { if (type == TypeFloatArray4) { - set_data_float4(v, dx, dy, derivatives, val); + set_data_float4(v, derivatives, val); return true; } if (type == TypePoint || type == TypeVector || type == TypeNormal || type == TypeColor) { - set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val); + set_data_float3(make_float3(v), derivatives, val); return true; } if (type == TypeFloat) { - set_data_float(average(make_float3(v)), - average(make_float3(dx)), - average(make_float3(dy)), - derivatives, - val); + set_data_float(average(make_float3(v)), derivatives, val); return true; } return false; @@ -560,7 +503,7 @@ ccl_device_template_spec bool set_attribute( template ccl_device_inline bool set_attribute(const T f, const TypeDesc type, bool derivatives, void *val) { - return set_attribute(f, make_zero(), make_zero(), type, derivatives, val); + return set_attribute(dual(f), type, derivatives, val); } ccl_device_template_spec bool set_attribute(const int i, @@ -610,17 +553,9 @@ static bool set_attribute_float3_3(const float3 P[3], TypeDesc type, bool deriva if (type.vecsemantics == TypeDesc::POINT && type.arraylen >= 3) { float *fval = (float *)val; - fval[0] = P[0].x; - fval[1] = P[0].y; - fval[2] = P[0].z; - - fval[3] = P[1].x; - fval[4] = P[1].y; - fval[5] = P[1].z; - - fval[6] = P[2].x; - fval[7] = P[2].y; - fval[8] = P[2].z; + copy_v3_v3(fval, P[0]); + copy_v3_v3(fval + 3, P[1]); + copy_v3_v3(fval + 6, P[2]); if (type.arraylen > 3) { memset(fval + 3 * 3, 0, sizeof(float) * 3 * (type.arraylen - 3)); @@ -653,20 +588,17 @@ inline bool get_object_attribute_impl(const ThreadKernelGlobalsCPU *kg, bool derivatives, void *val) { - T v; - T dx = make_zero(); - T dy = make_zero(); + dual data; #ifdef __VOLUME__ if (primitive_is_volume_attribute(sd)) { - v = primitive_volume_attribute(kg, sd, desc, true); + data.val = primitive_volume_attribute(kg, sd, desc, true); } else #endif { - v = primitive_surface_attribute( - kg, sd, desc, derivatives ? &dx : nullptr, derivatives ? &dy : nullptr); + data = primitive_surface_attribute(kg, sd, desc, derivatives, derivatives); } - return set_attribute(v, dx, dy, type, derivatives, val); + return set_attribute(data, type, derivatives, val); } static bool get_object_attribute(const ThreadKernelGlobalsCPU *kg, @@ -867,11 +799,11 @@ bool OSLRenderServices::get_object_standard_attribute( return false; } if (name == u_bump_map_normal) { - float3 f[3]; + dual3 f; if (!attribute_bump_map_normal(kg, sd, f)) { return false; } - return set_attribute(f[0], f[1], f[2], type, derivatives, val); + return set_attribute(f, type, derivatives, val); } return get_background_attribute(globals, name, type, derivatives, val); } @@ -936,29 +868,24 @@ bool OSLRenderServices::get_background_attribute( if (name == u_ndc) { /* NDC coordinates with special exception for orthographic projection. */ - float3 ndc[3]; + dual3 ndc; if ((globals->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { - ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P); - - if (derivatives) { - ndc[1] = zero_float3(); - ndc[2] = zero_float3(); - } + ndc.val = camera_world_to_ndc(kg, sd, sd->ray_P); } else { - ndc[0] = camera_world_to_ndc(kg, sd, sd->P); + ndc.val = camera_world_to_ndc(kg, sd, sd->P); if (derivatives) { const differential3 dP = differential_from_compact(sd->Ng, sd->dP); - ndc[1] = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc[0]; - ndc[2] = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc[0]; + ndc.dx = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc.val; + ndc.dy = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc.val; } } - return set_attribute(ndc[0], ndc[1], ndc[2], type, derivatives, val); + return set_attribute(ndc, type, derivatives, val); } return false; @@ -1682,17 +1609,17 @@ bool OSLRenderServices::getmessage(OSL::ShaderGlobals *sg, } if (name == u_P) { const differential3 dP = differential_from_compact(sd->Ng, sd->dP); - return set_attribute(sd->P, dP.dx, dP.dy, type, derivatives, val); + return set_attribute(dual3(sd->P, dP.dx, dP.dy), type, derivatives, val); } if (name == u_I) { const differential3 dI = differential_from_compact(sd->wi, sd->dI); - return set_attribute(sd->wi, dI.dx, dI.dy, type, derivatives, val); + return set_attribute(dual3(sd->wi, dI.dx, dI.dy), type, derivatives, val); } if (name == u_u) { - return set_attribute(sd->u, sd->du.dx, sd->du.dy, type, derivatives, val); + return set_attribute(dual1(sd->u, sd->du.dx, sd->du.dy), type, derivatives, val); } if (name == u_v) { - return set_attribute(sd->v, sd->dv.dx, sd->dv.dy, type, derivatives, val); + return set_attribute(dual1(sd->v, sd->dv.dx, sd->dv.dy), type, derivatives, val); } return get_attribute(sg, derivatives, u_empty, type, name, val); diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h index 9a210aad257..f01bdd11728 100644 --- a/intern/cycles/kernel/osl/services_gpu.h +++ b/intern/cycles/kernel/osl/services_gpu.h @@ -425,67 +425,7 @@ ccl_device_extern bool osl_get_inverse_matrix(ccl_private ShaderGlobals *sg, /* Attributes */ -typedef long long TypeDesc; - -template -ccl_device_inline bool set_attribute(const T v, - const T dx, - const T dy, - const TypeDesc type, - bool derivatives, - ccl_private void *val); - -ccl_device_inline void set_data_float( - const float v, const float dx, const float dy, bool derivatives, ccl_private void *val) -{ - ccl_private float *fval = static_cast(val); - fval[0] = v; - if (derivatives) { - fval[1] = dx; - fval[2] = dy; - } -} - -ccl_device_inline void set_data_float3( - const float3 v, const float3 dx, const float3 dy, bool derivatives, ccl_private void *val) -{ - ccl_private float *fval = static_cast(val); - fval[0] = v.x; - fval[1] = v.y; - fval[2] = v.z; - if (derivatives) { - fval[3] = dx.x; - fval[4] = dx.y; - fval[5] = dx.z; - fval[6] = dy.x; - fval[7] = dy.y; - fval[8] = dy.z; - } -} - -ccl_device_inline void set_data_float4( - const float4 v, const float4 dx, const float4 dy, bool derivatives, ccl_private void *val) -{ - ccl_private float *fval = static_cast(val); - fval[0] = v.x; - fval[1] = v.y; - fval[2] = v.z; - fval[3] = v.w; - if (derivatives) { - fval[4] = dx.x; - fval[5] = dx.y; - fval[6] = dx.z; - fval[7] = dx.w; - fval[8] = dy.x; - fval[9] = dy.y; - fval[10] = dy.z; - fval[11] = dy.w; - } -} - -ccl_device_template_spec bool set_attribute(const float v, - const float dx, - const float dy, +ccl_device_template_spec bool set_attribute(const dual1 v, const TypeDesc type, bool derivatives, ccl_private void *val) @@ -497,29 +437,23 @@ ccl_device_template_spec bool set_attribute(const float v, if (type_basetype == 11 /* TypeDesc::FLOAT */) { if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3)) { - set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val); + set_data_float3(make_float3(v), derivatives, val); return true; } if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4)) { - set_data_float4(make_float4(v, v, v, 1.0f), - make_float4(dx, dx, dx, 0.0f), - make_float4(dy, dy, dy, 0.0f), - derivatives, - val); + set_data_float4(make_float4(make_float3(v)), derivatives, val); return true; } if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) { - set_data_float(v, dx, dy, derivatives, val); + set_data_float(v, derivatives, val); return true; } } return false; } -ccl_device_template_spec bool set_attribute(const float2 v, - const float2 dx, - const float2 dy, +ccl_device_template_spec bool set_attribute(const dual2 v, const TypeDesc type, bool derivatives, ccl_private void *val) @@ -531,28 +465,22 @@ ccl_device_template_spec bool set_attribute(const float2 v, if (type_basetype == 11 /* TypeDesc::FLOAT */) { if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3)) { - set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val); + set_data_float3(make_float3(v), derivatives, val); return true; } if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4)) { - set_data_float4(make_float4(v.x, v.y, 0.0f, 1.0f), - make_float4(dx.x, dx.y, 0.0f, 0.0f), - make_float4(dy.x, dy.y, 0.0f, 0.0f), - derivatives, - val); + set_data_float4(make_float4(make_float3(v)), derivatives, val); } if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) { - set_data_float(average(v), average(dx), average(dy), derivatives, val); + set_data_float(average(v), derivatives, val); return true; } } return false; } -ccl_device_template_spec bool set_attribute(const float3 v, - const float3 dx, - const float3 dy, +ccl_device_template_spec bool set_attribute(const dual3 v, const TypeDesc type, bool derivatives, ccl_private void *val) @@ -564,26 +492,23 @@ ccl_device_template_spec bool set_attribute(const float3 v, if (type_basetype == 11 /* TypeDesc::FLOAT */) { if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3)) { - set_data_float3(v, dx, dy, derivatives, val); + set_data_float3(v, derivatives, val); return true; } if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4)) { - set_data_float4( - make_float4(v, 1.0f), make_float4(dx, 0.0f), make_float4(dy, 0.0f), derivatives, val); + set_data_float4(make_float4(v), derivatives, val); return true; } if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) { - set_data_float(average(v), average(dx), average(dy), derivatives, val); + set_data_float(average(v), derivatives, val); return true; } } return false; } -ccl_device_template_spec bool set_attribute(const float4 v, - const float4 dx, - const float4 dy, +ccl_device_template_spec bool set_attribute(const dual4 v, const TypeDesc type, bool derivatives, ccl_private void *val) @@ -595,20 +520,16 @@ ccl_device_template_spec bool set_attribute(const float4 v, if (type_basetype == 11 /* TypeDesc::FLOAT */) { if ((type_aggregate == 3 /* TypeDesc::VEC3 */) || (type_aggregate == 1 && type_arraylen == 3)) { - set_data_float3(make_float3(v), make_float3(dx), make_float3(dy), derivatives, val); + set_data_float3(make_float3(v), derivatives, val); return true; } if ((type_aggregate == 4 /* TypeDesc::VEC4 */) || (type_aggregate == 1 && type_arraylen == 4)) { - set_data_float4(v, dx, dy, derivatives, val); + set_data_float4(v, derivatives, val); return true; } if ((type_aggregate == 1 /* TypeDesc::SCALAR */)) { - set_data_float(average(make_float3(v)), - average(make_float3(dx)), - average(make_float3(dy)), - derivatives, - val); + set_data_float(average(make_float3(v)), derivatives, val); return true; } } @@ -622,7 +543,7 @@ ccl_device_inline bool set_attribute(const T f, bool derivatives, ccl_private void *val) { - return set_attribute(f, make_zero(), make_zero(), type, derivatives, val); + return set_attribute(dual(f), type, derivatives, val); } ccl_device_inline bool set_attribute_matrix(const ccl_private Transform &tfm, @@ -730,29 +651,24 @@ ccl_device_inline bool get_background_attribute(KernelGlobals kg, else if (name == DeviceStrings::u_ndc) { /* NDC coordinates with special exception for orthographic projection. */ - float3 ndc[3]; + dual3 ndc; if ((sg->raytype & PATH_RAY_CAMERA) && sd->object == OBJECT_NONE && kernel_data.cam.type == CAMERA_ORTHOGRAPHIC) { - ndc[0] = camera_world_to_ndc(kg, sd, sd->ray_P); - - if (derivatives) { - ndc[1] = zero_float3(); - ndc[2] = zero_float3(); - } + ndc.val = camera_world_to_ndc(kg, sd, sd->ray_P); } else { - ndc[0] = camera_world_to_ndc(kg, sd, sd->P); + ndc.val = camera_world_to_ndc(kg, sd, sd->P); if (derivatives) { const differential3 dP = differential_from_compact(sd->Ng, sd->dP); - ndc[1] = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc[0]; - ndc[2] = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc[0]; + ndc.dx = camera_world_to_ndc(kg, sd, sd->P + dP.dx) - ndc.val; + ndc.dy = camera_world_to_ndc(kg, sd, sd->P + dP.dy) - ndc.val; } } - return set_attribute(ndc[0], ndc[1], ndc[2], type, derivatives, val); + return set_attribute(ndc, type, derivatives, val); } return false; @@ -766,20 +682,17 @@ ccl_device_inline bool get_object_attribute_impl(KernelGlobals kg, bool derivatives, ccl_private void *val) { - T v; - T dx = make_zero(); - T dy = make_zero(); + dual data; #ifdef __VOLUME__ if (primitive_is_volume_attribute(sd)) { - v = primitive_volume_attribute(kg, sd, desc, true); + data.val = primitive_volume_attribute(kg, sd, desc, true); } else #endif { - v = primitive_surface_attribute( - kg, sd, desc, derivatives ? &dx : nullptr, derivatives ? &dy : nullptr); + data = primitive_surface_attribute(kg, sd, desc, derivatives, derivatives); } - return set_attribute(v, dx, dy, type, derivatives, val); + return set_attribute(data, type, derivatives, val); } ccl_device_inline bool get_object_attribute(KernelGlobals kg, @@ -974,11 +887,11 @@ ccl_device_inline bool get_object_standard_attribute(KernelGlobals kg, } } if (name == DeviceStrings::u_bump_map_normal) { - float3 f[3]; + dual3 f; if (!attribute_bump_map_normal(kg, sd, f)) { return false; } - return set_attribute(f[0], f[1], f[2], type, derivatives, val); + return set_attribute(f, type, derivatives, val); } return get_background_attribute(kg, sg, sd, name, type, derivatives, val); diff --git a/intern/cycles/kernel/osl/services_shared.h b/intern/cycles/kernel/osl/services_shared.h index e4daf2e6625..df1742e5294 100644 --- a/intern/cycles/kernel/osl/services_shared.h +++ b/intern/cycles/kernel/osl/services_shared.h @@ -11,11 +11,49 @@ CCL_NAMESPACE_BEGIN -/* TODO: deduplicate function `set_attribute_float3()` in CPU and GPU. */ +#ifdef __KERNEL_OPTIX__ +typedef long long TypeDesc; +#endif + +template +ccl_device_inline bool set_attribute(const dual v, + const TypeDesc type, + bool derivatives, + ccl_private void *val); + +ccl_device_inline void set_data_float(const dual1 data, bool derivatives, ccl_private void *val) +{ + ccl_private float *fval = static_cast(val); + fval[0] = data.val; + if (derivatives) { + fval[1] = data.dx; + fval[2] = data.dy; + } +} + +ccl_device_inline void set_data_float3(const dual3 data, bool derivatives, ccl_private void *val) +{ + ccl_private float *fval = static_cast(val); + copy_v3_v3(fval, data.val); + if (derivatives) { + copy_v3_v3(fval + 3, data.dx); + copy_v3_v3(fval + 6, data.dy); + } +} + +ccl_device_inline void set_data_float4(const dual4 data, bool derivatives, ccl_private void *val) +{ + ccl_private float *fval = static_cast(val); + copy_v4_v4(fval, data.val); + if (derivatives) { + copy_v4_v4(fval + 4, data.dx); + copy_v4_v4(fval + 8, data.dy); + } +} ccl_device bool attribute_bump_map_normal(KernelGlobals kg, ccl_private const ShaderData *sd, - float3 f[3]) + ccl_private dual3 &f) { if (!(sd->type & PRIMITIVE_TRIANGLE) || !(sd->shader & SHADER_SMOOTH_NORMAL)) { /* TODO: implement for curve. */ @@ -29,29 +67,27 @@ ccl_device bool attribute_bump_map_normal(KernelGlobals kg, object_inverse_normal_transform(kg, sd, &Ng); if (sd->type == PRIMITIVE_TRIANGLE) { - f[0] = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v, sd->du, sd->dv, f[1], f[2]); + f.val = triangle_smooth_normal(kg, Ng, sd->prim, sd->u, sd->v, sd->du, sd->dv, f.dx, f.dy); } else { assert(sd->type & PRIMITIVE_MOTION_TRIANGLE); - f[0] = motion_triangle_smooth_normal( - kg, Ng, sd->object, sd->prim, sd->time, sd->u, sd->v, sd->du, sd->dv, f[1], f[2]); + f.val = motion_triangle_smooth_normal( + kg, Ng, sd->object, sd->prim, sd->time, sd->u, sd->v, sd->du, sd->dv, f.dx, f.dy); } if (sd->object_flag & SD_OBJECT_TRANSFORM_APPLIED) { /* Transform to local space. */ - object_inverse_normal_transform(kg, sd, f); - object_inverse_normal_transform(kg, sd, f + 1); - object_inverse_normal_transform(kg, sd, f + 2); + object_inverse_normal_transform(kg, sd, &f.val); + object_inverse_normal_transform(kg, sd, &f.dx); + object_inverse_normal_transform(kg, sd, &f.dy); } if (backfacing) { - f[0] = -f[0]; - f[1] = -f[1]; - f[2] = -f[2]; + f = -f; } - f[1] -= f[0]; - f[2] -= f[0]; + f.dx -= f.val; + f.dy -= f.val; return true; } diff --git a/intern/cycles/kernel/svm/attribute.h b/intern/cycles/kernel/svm/attribute.h index 095fe74ec04..cfaec052321 100644 --- a/intern/cycles/kernel/svm/attribute.h +++ b/intern/cycles/kernel/svm/attribute.h @@ -122,7 +122,7 @@ ccl_device_inline void svm_surface_attr(KernelGlobals kg, ccl_private float *stack, const uint out_offset) { - T f = primitive_surface_attribute(kg, sd, desc, nullptr, nullptr); + T f = primitive_surface_attribute(kg, sd, desc).val; svm_node_attr_store(type, stack, out_offset, f); } @@ -135,10 +135,9 @@ ccl_device_inline void svm_surface_attr_dx(KernelGlobals kg, ccl_private float *stack, const uint out_offset) { - T dfdx; - T f = primitive_surface_attribute(kg, sd, desc, &dfdx, nullptr); - f += dfdx * bump_filter_width; - svm_node_attr_store(type, stack, out_offset, f); + dual f = primitive_surface_attribute(kg, sd, desc, true, false); + f.val += f.dx * bump_filter_width; + svm_node_attr_store(type, stack, out_offset, f.val); } template @@ -150,10 +149,9 @@ ccl_device_inline void svm_surface_attr_dy(KernelGlobals kg, ccl_private float *stack, const uint out_offset) { - T dfdy; - T f = primitive_surface_attribute(kg, sd, desc, nullptr, &dfdy); - f += dfdy * bump_filter_width; - svm_node_attr_store(type, stack, out_offset, f); + dual f = primitive_surface_attribute(kg, sd, desc, false, true); + f.val += f.dy * bump_filter_width; + svm_node_attr_store(type, stack, out_offset, f.val); } template diff --git a/intern/cycles/kernel/svm/bump.h b/intern/cycles/kernel/svm/bump.h index 0153065aaf5..88c7f95b7a4 100644 --- a/intern/cycles/kernel/svm/bump.h +++ b/intern/cycles/kernel/svm/bump.h @@ -31,19 +31,16 @@ ccl_device_noinline void svm_node_enter_bump_eval(KernelGlobals kg, const AttributeDescriptor desc = find_attribute(kg, sd, ATTR_STD_POSITION_UNDISPLACED); if (desc.offset != ATTR_STD_NOT_FOUND) { - differential3 dP; - float3 P = primitive_surface_attribute(kg, sd, desc, &dP.dx, &dP.dy); + dual3 P = primitive_surface_attribute(kg, sd, desc, true, true); object_position_transform(kg, sd, &P); - object_dir_transform(kg, sd, &dP.dx); - object_dir_transform(kg, sd, &dP.dy); - sd->P = P; - sd->dP = differential_make_compact(dP); + sd->P = P.val; + sd->dP = differential_make_compact(P); /* Save the full differential, the compact form isn't enough for svm_node_set_bump. */ - stack_store_float3(stack, offset + 4, dP.dx); - stack_store_float3(stack, offset + 7, dP.dy); + stack_store_float3(stack, offset + 4, P.dx); + stack_store_float3(stack, offset + 7, P.dy); } } diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h index 91b2da4fa26..3e9932de394 100644 --- a/intern/cycles/kernel/svm/closure.h +++ b/intern/cycles/kernel/svm/closure.h @@ -845,7 +845,7 @@ ccl_device const AttributeDescriptor attr_descr_random = find_attribute(kg, sd, data_node2.y); float random = 0.0f; if (attr_descr_random.offset != ATTR_STD_NOT_FOUND) { - random = primitive_surface_attribute(kg, sd, attr_descr_random, nullptr, nullptr); + random = primitive_surface_attribute(kg, sd, attr_descr_random).val; } else { random = stack_load_float_default(stack, random_ofs, data_node3.y); @@ -977,7 +977,7 @@ ccl_device if (bsdf->aspect_ratio != 1.0f) { /* Align ellipse major axis with the curve normal direction. */ const AttributeDescriptor attr_descr_normal = find_attribute(kg, sd, shared_ofs2); - bsdf->N = curve_attribute(kg, sd, attr_descr_normal, nullptr, nullptr); + bsdf->N = curve_attribute(kg, sd, attr_descr_normal).val; } bsdf->roughness = roughness; diff --git a/intern/cycles/kernel/svm/displace.h b/intern/cycles/kernel/svm/displace.h index 2151f2d2bad..0e8ffd92df4 100644 --- a/intern/cycles/kernel/svm/displace.h +++ b/intern/cycles/kernel/svm/displace.h @@ -206,7 +206,7 @@ ccl_device_noinline int svm_node_vector_displacement(KernelGlobals kg, const AttributeDescriptor attr = find_attribute(kg, sd, node.z); float3 tangent; if (attr.offset != ATTR_STD_NOT_FOUND) { - tangent = primitive_surface_attribute(kg, sd, attr, nullptr, nullptr); + tangent = primitive_surface_attribute(kg, sd, attr).val; } else { tangent = normalize(sd->dPdu); @@ -215,7 +215,7 @@ ccl_device_noinline int svm_node_vector_displacement(KernelGlobals kg, float3 bitangent = safe_normalize(cross(normal, tangent)); const AttributeDescriptor attr_sign = find_attribute(kg, sd, node.w); if (attr_sign.offset != ATTR_STD_NOT_FOUND) { - const float sign = primitive_surface_attribute(kg, sd, attr_sign, nullptr, nullptr); + const float sign = primitive_surface_attribute(kg, sd, attr_sign).val; bitangent *= sign; } diff --git a/intern/cycles/kernel/svm/tex_coord.h b/intern/cycles/kernel/svm/tex_coord.h index ac57c8c2fab..3a0ae18b1ba 100644 --- a/intern/cycles/kernel/svm/tex_coord.h +++ b/intern/cycles/kernel/svm/tex_coord.h @@ -356,8 +356,8 @@ ccl_device_noinline void svm_node_normal_map(KernelGlobals kg, } /* get _unnormalized_ interpolated normal and tangent */ - const float3 tangent = primitive_surface_attribute(kg, sd, attr, nullptr, nullptr); - const float sign = primitive_surface_attribute(kg, sd, attr_sign, nullptr, nullptr); + const float3 tangent = primitive_surface_attribute(kg, sd, attr).val; + const float sign = primitive_surface_attribute(kg, sd, attr_sign).val; float3 normal; if (sd->shader & SHADER_SMOOTH_NORMAL) { @@ -441,13 +441,13 @@ ccl_device_noinline void svm_node_tangent(KernelGlobals kg, const AttributeDescriptor desc = find_attribute(kg, sd, node.z); if (desc.offset != ATTR_STD_NOT_FOUND) { if (desc.type == NODE_ATTR_FLOAT2) { - const float2 value = primitive_surface_attribute(kg, sd, desc, nullptr, nullptr); + const float2 value = primitive_surface_attribute(kg, sd, desc).val; attribute_value.x = value.x; attribute_value.y = value.y; attribute_value.z = 0.0f; } else { - attribute_value = primitive_surface_attribute(kg, sd, desc, nullptr, nullptr); + attribute_value = primitive_surface_attribute(kg, sd, desc).val; } } diff --git a/intern/cycles/kernel/svm/util.h b/intern/cycles/kernel/svm/util.h index 7863671b2ef..16328347d92 100644 --- a/intern/cycles/kernel/svm/util.h +++ b/intern/cycles/kernel/svm/util.h @@ -24,11 +24,7 @@ ccl_device_inline float3 stack_load_float3(const ccl_private float *stack, const ccl_device_inline void stack_store_float3(ccl_private float *stack, const uint a, const float3 f) { kernel_assert(a + 2 < SVM_STACK_SIZE); - - ccl_private float *stack_a = stack + a; - stack_a[0] = f.x; - stack_a[1] = f.y; - stack_a[2] = f.z; + copy_v3_v3(stack + a, f); } ccl_device_inline float stack_load_float(const ccl_private float *stack, const uint a) diff --git a/intern/cycles/kernel/svm/vertex_color.h b/intern/cycles/kernel/svm/vertex_color.h index 8dd8ad841d5..c595c857ceb 100644 --- a/intern/cycles/kernel/svm/vertex_color.h +++ b/intern/cycles/kernel/svm/vertex_color.h @@ -24,14 +24,12 @@ ccl_device_noinline void svm_node_vertex_color(KernelGlobals kg, const AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { if (descriptor.type == NODE_ATTR_FLOAT4 || descriptor.type == NODE_ATTR_RGBA) { - const float4 vertex_color = primitive_surface_attribute( - kg, sd, descriptor, nullptr, nullptr); + const float4 vertex_color = primitive_surface_attribute(kg, sd, descriptor).val; stack_store_float3(stack, color_offset, make_float3(vertex_color)); stack_store_float(stack, alpha_offset, vertex_color.w); } else { - const float3 vertex_color = primitive_surface_attribute( - kg, sd, descriptor, nullptr, nullptr); + const float3 vertex_color = primitive_surface_attribute(kg, sd, descriptor).val; stack_store_float3(stack, color_offset, vertex_color); stack_store_float(stack, alpha_offset, 1.0f); } @@ -56,19 +54,15 @@ ccl_device_noinline void svm_node_vertex_color_bump_dx(KernelGlobals kg, const AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { if (descriptor.type == NODE_ATTR_FLOAT4 || descriptor.type == NODE_ATTR_RGBA) { - float4 dfdx; - float4 vertex_color = primitive_surface_attribute( - kg, sd, descriptor, &dfdx, nullptr); - vertex_color += dfdx * bump_filter_width; - stack_store_float3(stack, color_offset, make_float3(vertex_color)); - stack_store_float(stack, alpha_offset, vertex_color.w); + dual4 vertex_color = primitive_surface_attribute(kg, sd, descriptor, true, false); + vertex_color.val += vertex_color.dx * bump_filter_width; + stack_store_float3(stack, color_offset, make_float3(vertex_color.val)); + stack_store_float(stack, alpha_offset, vertex_color.val.w); } else { - float3 dfdx; - float3 vertex_color = primitive_surface_attribute( - kg, sd, descriptor, &dfdx, nullptr); - vertex_color += dfdx * bump_filter_width; - stack_store_float3(stack, color_offset, vertex_color); + dual3 vertex_color = primitive_surface_attribute(kg, sd, descriptor, true, false); + vertex_color.val += vertex_color.dx * bump_filter_width; + stack_store_float3(stack, color_offset, vertex_color.val); stack_store_float(stack, alpha_offset, 1.0f); } } @@ -92,19 +86,15 @@ ccl_device_noinline void svm_node_vertex_color_bump_dy(KernelGlobals kg, const AttributeDescriptor descriptor = find_attribute(kg, sd, layer_id); if (descriptor.offset != ATTR_STD_NOT_FOUND) { if (descriptor.type == NODE_ATTR_FLOAT4 || descriptor.type == NODE_ATTR_RGBA) { - float4 dfdy; - float4 vertex_color = primitive_surface_attribute( - kg, sd, descriptor, nullptr, &dfdy); - vertex_color += dfdy * bump_filter_width; - stack_store_float3(stack, color_offset, make_float3(vertex_color)); - stack_store_float(stack, alpha_offset, vertex_color.w); + dual4 vertex_color = primitive_surface_attribute(kg, sd, descriptor, false, true); + vertex_color.val += vertex_color.dy * bump_filter_width; + stack_store_float3(stack, color_offset, make_float3(vertex_color.val)); + stack_store_float(stack, alpha_offset, vertex_color.val.w); } else { - float3 dfdy; - float3 vertex_color = primitive_surface_attribute( - kg, sd, descriptor, nullptr, &dfdy); - vertex_color += dfdy * bump_filter_width; - stack_store_float3(stack, color_offset, vertex_color); + dual3 vertex_color = primitive_surface_attribute(kg, sd, descriptor, false, true); + vertex_color.val += vertex_color.dy * bump_filter_width; + stack_store_float3(stack, color_offset, vertex_color.val); stack_store_float(stack, alpha_offset, 1.0f); } } diff --git a/intern/cycles/kernel/util/differential.h b/intern/cycles/kernel/util/differential.h index 61338c2a08c..b41d17b71b2 100644 --- a/intern/cycles/kernel/util/differential.h +++ b/intern/cycles/kernel/util/differential.h @@ -125,6 +125,11 @@ ccl_device_forceinline float differential_make_compact(const differential3 dD) return 0.5f * (len(dD.dx) + len(dD.dy)); } +ccl_device_forceinline float differential_make_compact(const dual3 D) +{ + return 0.5f * (len(D.dx) + len(D.dy)); +} + ccl_device_forceinline float differential_incoming_compact(const float dD) { return dD; diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 8926376c305..f6ec019ede3 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -75,6 +75,7 @@ set(SRC_HEADERS math_int3.h math_int4.h math_int8.h + math_dual.h md5.h murmurhash.h nanovdb.h @@ -121,6 +122,7 @@ set(SRC_HEADERS types_uint3.h types_uint4.h types_ushort4.h + types_dual.h unique_ptr.h unique_ptr_vector.h vector.h diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index 999ed80abd6..2248d0ecd43 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -19,4 +19,6 @@ #include "util/math_float3.h" // IWYU pragma: export +#include "util/math_dual.h" // IWYU pragma: export + #include "util/rect.h" // IWYU pragma: export diff --git a/intern/cycles/util/math_dual.h b/intern/cycles/util/math_dual.h new file mode 100644 index 00000000000..0f489885229 --- /dev/null +++ b/intern/cycles/util/math_dual.h @@ -0,0 +1,59 @@ +/* SPDX-FileCopyrightText: 2025 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#include "util/math_base.h" +#include "util/types_dual.h" + +CCL_NAMESPACE_BEGIN + +ccl_device_template_spec dual1 make_zero() +{ + return dual1(); +} + +ccl_device_template_spec dual2 make_zero() +{ + return dual2(); +} + +ccl_device_template_spec dual3 make_zero() +{ + return dual3(); +} + +ccl_device_template_spec dual4 make_zero() +{ + return dual4(); +} + +/* Multiplication of dual by scalar. */ +template ccl_device_inline dual operator*(const dual a, T2 b) +{ + return {a.val * b, a.dx * b, a.dy * b}; +} + +/* Negation. */ +template ccl_device_inline dual operator-(const ccl_private dual &a) +{ + return {-a.val, -a.dx, -a.dy}; +} + +template ccl_device_inline dual1 average(const dual a) +{ + return {average(a.val), average(a.dx), average(a.dy)}; +} + +template ccl_device_inline dual1 reduce_add(const dual a) +{ + return {reduce_add(a.val), reduce_add(a.dx), reduce_add(a.dy)}; +} + +template ccl_device_inline dual1 dot(const dual a, const T2 b) +{ + return reduce_add(a * b); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index d14a84ca701..6742122c889 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -749,4 +749,11 @@ ccl_device_inline float2 map_to_sphere(const float3 co) return make_float2(u, v); } +ccl_device_inline void copy_v3_v3(ccl_private float *r, const float3 val) +{ + r[0] = val.x; + r[1] = val.y; + r[2] = val.z; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_float4.h b/intern/cycles/util/math_float4.h index ab36a16219d..de7e2f360c7 100644 --- a/intern/cycles/util/math_float4.h +++ b/intern/cycles/util/math_float4.h @@ -655,4 +655,12 @@ ccl_device_inline float4 __int4_as_float4(const int4 i) } #endif /* !defined(__KERNEL_METAL__) && !defined(__KERNEL_ONEAPI__) */ +ccl_device_inline void copy_v4_v4(ccl_private float *r, const float4 val) +{ + r[0] = val.x; + r[1] = val.y; + r[2] = val.z; + r[3] = val.w; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h index 35aa9370395..49e118292c7 100644 --- a/intern/cycles/util/transform.h +++ b/intern/cycles/util/transform.h @@ -55,32 +55,72 @@ ccl_device_inline float3 transform_point(const ccl_global Transform *t, const fl ccl_device_inline float3 transform_point(const ccl_private Transform *t, const float3 a) { - /* TODO(sergey): Disabled for now, causes crashes in certain cases. */ #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) const float4 aa(a.m128); - - float4 x(_mm_loadu_ps(&t->x.x)); - float4 y(_mm_loadu_ps(&t->y.x)); - float4 z(_mm_loadu_ps(&t->z.x)); + float4 x = t->x; + float4 y = t->y; + float4 z = t->z; float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f)); - _MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128); - float4 tmp = w; tmp = madd(shuffle<2>(aa), z, tmp); tmp = madd(shuffle<1>(aa), y, tmp); tmp = madd(shuffle<0>(aa), x, tmp); - return float3(tmp.m128); #elif defined(__KERNEL_METAL__) const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t); return (a * b).xyz + make_float3(t->x.w, t->y.w, t->z.w); #else - float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z + t->x.w, - a.x * t->y.x + a.y * t->y.y + a.z * t->y.z + t->y.w, - a.x * t->z.x + a.y * t->z.y + a.z * t->z.z + t->z.w); + const float4 a_ = make_homogeneous(a); + return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z)); +#endif +} - return c; +ccl_device_inline dual3 transform_point(const ccl_private Transform *t, const dual3 a) +{ +#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) + /* NOTE: `dot()` has large lantency on Intel platforms, the following method of transpose + madd + * is faster. However, we did not measure on Neon platforms, it might be that `dot()` is fine + * there, and we can use the simpler implementation at the end of the function. */ + float4 x = t->x; + float4 y = t->y; + float4 z = t->z; + float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f)); + _MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128); + + float4 tmp = w; + dual3 result; + { + const float4 aa(a.val.m128); + tmp = madd(shuffle<2>(aa), z, tmp); + tmp = madd(shuffle<1>(aa), y, tmp); + tmp = madd(shuffle<0>(aa), x, tmp); + result.val = float3(tmp.m128); + } + + { + const float4 dx(a.dx.m128); + tmp = shuffle<2>(dx) * z; + tmp = madd(shuffle<1>(dx), y, tmp); + tmp = madd(shuffle<0>(dx), x, tmp); + result.dx = float3(tmp.m128); + } + + { + const float4 dy(a.dy.m128); + tmp = shuffle<2>(dy) * z; + tmp = madd(shuffle<1>(dy), y, tmp); + tmp = madd(shuffle<0>(dy), x, tmp); + result.dy = float3(tmp.m128); + } + + return result; +#elif defined(__KERNEL_METAL__) + const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t); + return {(a.val * b).xyz + make_float3(t->x.w, t->y.w, t->z.w), (a.dx * b).xyz, (a.dy * b).xyz}; +#else + const dual4 a_ = make_homogeneous(a); + return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z)); #endif } @@ -88,28 +128,21 @@ ccl_device_inline float3 transform_direction(const ccl_private Transform *t, con { #if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__) const float4 aa(a.m128); - - float4 x(_mm_loadu_ps(&t->x.x)); - float4 y(_mm_loadu_ps(&t->y.x)); - float4 z(_mm_loadu_ps(&t->z.x)); + float4 x = t->x; + float4 y = t->y; + float4 z = t->z; float4 w(_mm_setzero_ps()); - _MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128); - float4 tmp = shuffle<2>(aa) * z; tmp = madd(shuffle<1>(aa), y, tmp); tmp = madd(shuffle<0>(aa), x, tmp); - return float3(tmp.m128); #elif defined(__KERNEL_METAL__) const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t); return (a * b).xyz; #else - float3 c = make_float3(a.x * t->x.x + a.y * t->x.y + a.z * t->x.z, - a.x * t->y.x + a.y * t->y.y + a.z * t->y.z, - a.x * t->z.x + a.y * t->z.y + a.z * t->z.z); - - return c; + const float4 a_ = make_float4(a, 0.0f); + return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z)); #endif } diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h index e4ce7444874..6e75a43d3c8 100644 --- a/intern/cycles/util/types.h +++ b/intern/cycles/util/types.h @@ -27,3 +27,5 @@ #include "util/types_float8.h" // IWYU pragma: export #include "util/types_spectrum.h" // IWYU pragma: export + +#include "util/types_dual.h" // IWYU pragma: export diff --git a/intern/cycles/util/types_dual.h b/intern/cycles/util/types_dual.h new file mode 100644 index 00000000000..24576ba2530 --- /dev/null +++ b/intern/cycles/util/types_dual.h @@ -0,0 +1,136 @@ +/* SPDX-FileCopyrightText: 2025 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#include "util/types_float2.h" +#include "util/types_float3.h" +#include "util/types_float4.h" + +CCL_NAMESPACE_BEGIN + +template struct dual { + T val, dx, dy; + dual() = default; + ccl_device_inline_method explicit dual(const T val) : val(val) {} + ccl_device_inline_method dual(const T val, const T dx, const T dy) : val(val), dx(dx), dy(dy) + { + } +}; + +template<> struct dual { + float2 val = make_float2(0.0f); + float2 dx = make_float2(0.0f); + float2 dy = make_float2(0.0f); + dual() = default; + ccl_device_inline_method explicit dual(const float2 val) : val(val) {} + ccl_device_inline_method dual(const float2 val, const float2 dx, const float2 dy) + : val(val), dx(dx), dy(dy) + { + } +}; + +template<> struct dual { + float3 val = make_float3(0.0f); + float3 dx = make_float3(0.0f); + float3 dy = make_float3(0.0f); + dual() = default; + ccl_device_inline_method explicit dual(const float3 val) : val(val) {} + ccl_device_inline_method dual(const float3 val, const float3 dx, const float3 dy) + : val(val), dx(dx), dy(dy) + { + } +}; + +template<> struct dual { + float4 val = make_float4(0.0f); + float4 dx = make_float4(0.0f); + float4 dy = make_float4(0.0f); + dual() = default; + ccl_device_inline_method explicit dual(const float4 val) : val(val) {} + ccl_device_inline_method dual(const float4 val, const float4 dx, const float4 dy) + : val(val), dx(dx), dy(dy) + { + } +}; + +using dual1 = dual; +using dual2 = dual; +using dual3 = dual; +using dual4 = dual; + +template ccl_device_inline dual3 make_float3(const ccl_private dual &a) +{ + return {make_float3(a.val), make_float3(a.dx), make_float3(a.dy)}; +} + +ccl_device_inline dual3 make_float3(const dual1 a, const dual1 b, const dual1 c) +{ + return {make_float3(a.val, b.val, c.val), + make_float3(a.dx, b.dx, c.dx), + make_float3(a.dy, b.dy, c.dy)}; +} + +ccl_device_inline dual4 make_float4(const dual3 a) +{ + return {make_float4(a.val), make_float4(a.dx, 0.0f), make_float4(a.dy, 0.0f)}; +} + +ccl_device_inline dual4 make_homogeneous(const dual3 a) +{ + return {make_float4(a.val, 1.0f), make_float4(a.dx, 0.0f), make_float4(a.dy, 0.0f)}; +} + +ccl_device_inline void print_dual1(const ccl_private char *label, const dual1 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: {\nval = %.8f\n dx = %.8f\n dy = %.8f\n}\n", + label, + (double)a.val, + (double)a.dx, + (double)a.dy); +#else + (void)label; + (void)a; +#endif +} + +ccl_device_inline void print_dual2(const ccl_private char *label, const dual2 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: {\nval = %.8f %.8f\n dx = %.8f %.8f\n dy = %.8f %.8f\n}\n", + label, + (double)a.val.x, + (double)a.val.y, + (double)a.dx.x, + (double)a.dx.y, + (double)a.dy.x, + (double)a.dy.y); +#else + (void)label; + (void)a; +#endif +} + +ccl_device_inline void print_dual3(const ccl_private char *label, const dual3 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: {\nval = %.8f %.8f %.8f\n dx = %.8f %.8f %.8f\n dy = %.8f %.8f %.8f\n}\n", + label, + (double)a.val.x, + (double)a.val.y, + (double)a.val.z, + (double)a.dx.x, + (double)a.dx.y, + (double)a.dx.z, + (double)a.dy.x, + (double)a.dy.y, + (double)a.dy.z); +#else + (void)label; + (void)a; +#endif +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h index b4fb8cbcf03..d8980306749 100644 --- a/intern/cycles/util/types_float4.h +++ b/intern/cycles/util/types_float4.h @@ -92,6 +92,11 @@ ccl_device_inline float4 make_float4(const float3 a) return make_float4(a.x, a.y, a.z, 1.0f); } +ccl_device_inline float4 make_homogeneous(const float3 a) +{ + return make_float4(a.x, a.y, a.z, 1.0f); +} + ccl_device_inline float4 make_float4(const int4 i) { #ifdef __KERNEL_SSE__