to replace some uses of dfdx/dfdy/differentials. No functional change expected. Pull Request: https://projects.blender.org/blender/blender/pulls/143178
651 lines
20 KiB
C++
651 lines
20 KiB
C++
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#pragma once
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
# include <cstring>
|
|
#endif
|
|
|
|
#include "util/math.h"
|
|
#include "util/types.h"
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
# include "util/system.h"
|
|
#endif
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
/* Affine transformation, stored as 4x3 matrix. */
|
|
|
|
struct Transform {
|
|
float4 x, y, z;
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
float4 operator[](int i) const
|
|
{
|
|
return *(&x + i);
|
|
}
|
|
float4 &operator[](int i)
|
|
{
|
|
return *(&x + i);
|
|
}
|
|
#endif
|
|
};
|
|
|
|
/* Transform decomposed in rotation/translation/scale. we use the same data
|
|
* structure as Transform, and tightly pack decomposition into it. first the
|
|
* rotation (4), then translation (3), then 3x3 scale matrix (9). */
|
|
|
|
struct DecomposedTransform {
|
|
float4 x, y, z, w;
|
|
};
|
|
|
|
/* Functions */
|
|
|
|
#ifdef __KERNEL_METAL__
|
|
/* transform_point specialized for ccl_global */
|
|
ccl_device_inline float3 transform_point(const ccl_global Transform *t, const float3 a)
|
|
{
|
|
const ccl_global float3x3 &b(*(const ccl_global float3x3 *)t);
|
|
return (a * b).xyz + make_float3(t->x.w, t->y.w, t->z.w);
|
|
}
|
|
#endif
|
|
|
|
ccl_device_inline float3 transform_point(const ccl_private Transform *t, const float3 a)
|
|
{
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
|
|
const float4 aa(a.m128);
|
|
float4 x = t->x;
|
|
float4 y = t->y;
|
|
float4 z = t->z;
|
|
float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f));
|
|
_MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
|
|
float4 tmp = w;
|
|
tmp = madd(shuffle<2>(aa), z, tmp);
|
|
tmp = madd(shuffle<1>(aa), y, tmp);
|
|
tmp = madd(shuffle<0>(aa), x, tmp);
|
|
return float3(tmp.m128);
|
|
#elif defined(__KERNEL_METAL__)
|
|
const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t);
|
|
return (a * b).xyz + make_float3(t->x.w, t->y.w, t->z.w);
|
|
#else
|
|
const float4 a_ = make_homogeneous(a);
|
|
return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z));
|
|
#endif
|
|
}
|
|
|
|
ccl_device_inline dual3 transform_point(const ccl_private Transform *t, const dual3 a)
|
|
{
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
|
|
/* NOTE: `dot()` has large lantency on Intel platforms, the following method of transpose + madd
|
|
* is faster. However, we did not measure on Neon platforms, it might be that `dot()` is fine
|
|
* there, and we can use the simpler implementation at the end of the function. */
|
|
float4 x = t->x;
|
|
float4 y = t->y;
|
|
float4 z = t->z;
|
|
float4 w(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f));
|
|
_MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
|
|
|
|
float4 tmp = w;
|
|
dual3 result;
|
|
{
|
|
const float4 aa(a.val.m128);
|
|
tmp = madd(shuffle<2>(aa), z, tmp);
|
|
tmp = madd(shuffle<1>(aa), y, tmp);
|
|
tmp = madd(shuffle<0>(aa), x, tmp);
|
|
result.val = float3(tmp.m128);
|
|
}
|
|
|
|
{
|
|
const float4 dx(a.dx.m128);
|
|
tmp = shuffle<2>(dx) * z;
|
|
tmp = madd(shuffle<1>(dx), y, tmp);
|
|
tmp = madd(shuffle<0>(dx), x, tmp);
|
|
result.dx = float3(tmp.m128);
|
|
}
|
|
|
|
{
|
|
const float4 dy(a.dy.m128);
|
|
tmp = shuffle<2>(dy) * z;
|
|
tmp = madd(shuffle<1>(dy), y, tmp);
|
|
tmp = madd(shuffle<0>(dy), x, tmp);
|
|
result.dy = float3(tmp.m128);
|
|
}
|
|
|
|
return result;
|
|
#elif defined(__KERNEL_METAL__)
|
|
const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t);
|
|
return {(a.val * b).xyz + make_float3(t->x.w, t->y.w, t->z.w), (a.dx * b).xyz, (a.dy * b).xyz};
|
|
#else
|
|
const dual4 a_ = make_homogeneous(a);
|
|
return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z));
|
|
#endif
|
|
}
|
|
|
|
ccl_device_inline float3 transform_direction(const ccl_private Transform *t, const float3 a)
|
|
{
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE2__)
|
|
const float4 aa(a.m128);
|
|
float4 x = t->x;
|
|
float4 y = t->y;
|
|
float4 z = t->z;
|
|
float4 w(_mm_setzero_ps());
|
|
_MM_TRANSPOSE4_PS(x.m128, y.m128, z.m128, w.m128);
|
|
float4 tmp = shuffle<2>(aa) * z;
|
|
tmp = madd(shuffle<1>(aa), y, tmp);
|
|
tmp = madd(shuffle<0>(aa), x, tmp);
|
|
return float3(tmp.m128);
|
|
#elif defined(__KERNEL_METAL__)
|
|
const ccl_private float3x3 &b(*(const ccl_private float3x3 *)t);
|
|
return (a * b).xyz;
|
|
#else
|
|
const float4 a_ = make_float4(a, 0.0f);
|
|
return make_float3(dot(a_, t->x), dot(a_, t->y), dot(a_, t->z));
|
|
#endif
|
|
}
|
|
|
|
ccl_device_inline float3 transform_direction_transposed(const ccl_private Transform *t,
|
|
const float3 a)
|
|
{
|
|
const float3 x = make_float3(t->x.x, t->y.x, t->z.x);
|
|
const float3 y = make_float3(t->x.y, t->y.y, t->z.y);
|
|
const float3 z = make_float3(t->x.z, t->y.z, t->z.z);
|
|
|
|
return make_float3(dot(x, a), dot(y, a), dot(z, a));
|
|
}
|
|
|
|
ccl_device_inline Transform make_transform(const float a,
|
|
const float b,
|
|
const float c,
|
|
const float d,
|
|
const float e,
|
|
const float f,
|
|
const float g,
|
|
const float h,
|
|
const float i,
|
|
const float j,
|
|
const float k,
|
|
const float l)
|
|
{
|
|
Transform t;
|
|
|
|
t.x.x = a;
|
|
t.x.y = b;
|
|
t.x.z = c;
|
|
t.x.w = d;
|
|
t.y.x = e;
|
|
t.y.y = f;
|
|
t.y.z = g;
|
|
t.y.w = h;
|
|
t.z.x = i;
|
|
t.z.y = j;
|
|
t.z.z = k;
|
|
t.z.w = l;
|
|
|
|
return t;
|
|
}
|
|
|
|
ccl_device_inline Transform make_transform(const float3 x, const float3 y, const float3 z)
|
|
{
|
|
Transform t;
|
|
|
|
t.x = make_float4(x, 0.0f);
|
|
t.y = make_float4(y, 0.0f);
|
|
t.z = make_float4(z, 0.0f);
|
|
|
|
return t;
|
|
}
|
|
|
|
ccl_device_inline Transform euler_to_transform(const float3 euler)
|
|
{
|
|
const float cx = cosf(euler.x);
|
|
const float cy = cosf(euler.y);
|
|
const float cz = cosf(euler.z);
|
|
const float sx = sinf(euler.x);
|
|
const float sy = sinf(euler.y);
|
|
const float sz = sinf(euler.z);
|
|
|
|
Transform t;
|
|
t.x.x = cy * cz;
|
|
t.y.x = cy * sz;
|
|
t.z.x = -sy;
|
|
|
|
t.x.y = sy * sx * cz - cx * sz;
|
|
t.y.y = sy * sx * sz + cx * cz;
|
|
t.z.y = cy * sx;
|
|
|
|
t.x.z = sy * cx * cz + sx * sz;
|
|
t.y.z = sy * cx * sz - sx * cz;
|
|
t.z.z = cy * cx;
|
|
|
|
t.x.w = t.y.w = t.z.w = 0.0f;
|
|
return t;
|
|
}
|
|
|
|
/* Constructs a coordinate frame from a normalized normal. */
|
|
ccl_device_inline Transform make_transform_frame(const float3 N)
|
|
{
|
|
const float3 dx0 = cross(make_float3(1.0f, 0.0f, 0.0f), N);
|
|
const float3 dx1 = cross(make_float3(0.0f, 1.0f, 0.0f), N);
|
|
const float3 dx = normalize((dot(dx0, dx0) > dot(dx1, dx1)) ? dx0 : dx1);
|
|
const float3 dy = normalize(cross(N, dx));
|
|
return make_transform(dx.x, dx.y, dx.z, 0.0f, dy.x, dy.y, dy.z, 0.0f, N.x, N.y, N.z, 0.0f);
|
|
}
|
|
|
|
#if !defined(__KERNEL_METAL__)
|
|
ccl_device_inline Transform operator*(const Transform a, const Transform b)
|
|
{
|
|
const float4 c_x = make_float4(b.x.x, b.y.x, b.z.x, 0.0f);
|
|
const float4 c_y = make_float4(b.x.y, b.y.y, b.z.y, 0.0f);
|
|
const float4 c_z = make_float4(b.x.z, b.y.z, b.z.z, 0.0f);
|
|
const float4 c_w = make_float4(b.x.w, b.y.w, b.z.w, 1.0f);
|
|
|
|
Transform t;
|
|
t.x = make_float4(dot(a.x, c_x), dot(a.x, c_y), dot(a.x, c_z), dot(a.x, c_w));
|
|
t.y = make_float4(dot(a.y, c_x), dot(a.y, c_y), dot(a.y, c_z), dot(a.y, c_w));
|
|
t.z = make_float4(dot(a.z, c_x), dot(a.z, c_y), dot(a.z, c_z), dot(a.z, c_w));
|
|
|
|
return t;
|
|
}
|
|
#endif
|
|
|
|
ccl_device_inline Transform transform_zero()
|
|
{
|
|
Transform zero = {zero_float4(), zero_float4(), zero_float4()};
|
|
return zero;
|
|
}
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
ccl_device_inline void print_transform(const char *label, const Transform &t)
|
|
{
|
|
print_float4(label, t.x);
|
|
print_float4(label, t.y);
|
|
print_float4(label, t.z);
|
|
printf("\n");
|
|
}
|
|
|
|
ccl_device_inline Transform transform_translate(const float3 t)
|
|
{
|
|
return make_transform(1, 0, 0, t.x, 0, 1, 0, t.y, 0, 0, 1, t.z);
|
|
}
|
|
|
|
ccl_device_inline Transform transform_translate(const float x, const float y, float z)
|
|
{
|
|
return transform_translate(make_float3(x, y, z));
|
|
}
|
|
|
|
ccl_device_inline Transform transform_scale(const float3 s)
|
|
{
|
|
return make_transform(s.x, 0, 0, 0, 0, s.y, 0, 0, 0, 0, s.z, 0);
|
|
}
|
|
|
|
ccl_device_inline Transform transform_scale(const float x, const float y, float z)
|
|
{
|
|
return transform_scale(make_float3(x, y, z));
|
|
}
|
|
|
|
ccl_device_inline Transform transform_rotate(const float angle, float3 axis)
|
|
{
|
|
const float s = sinf(angle);
|
|
const float c = cosf(angle);
|
|
const float t = 1.0f - c;
|
|
|
|
axis = normalize(axis);
|
|
|
|
return make_transform(axis.x * axis.x * t + c,
|
|
axis.x * axis.y * t - s * axis.z,
|
|
axis.x * axis.z * t + s * axis.y,
|
|
0.0f,
|
|
|
|
axis.y * axis.x * t + s * axis.z,
|
|
axis.y * axis.y * t + c,
|
|
axis.y * axis.z * t - s * axis.x,
|
|
0.0f,
|
|
|
|
axis.z * axis.x * t - s * axis.y,
|
|
axis.z * axis.y * t + s * axis.x,
|
|
axis.z * axis.z * t + c,
|
|
0.0f);
|
|
}
|
|
|
|
/* Euler is assumed to be in XYZ order. */
|
|
ccl_device_inline Transform transform_euler(const float3 euler)
|
|
{
|
|
return transform_rotate(euler.z, make_float3(0.0f, 0.0f, 1.0f)) *
|
|
transform_rotate(euler.y, make_float3(0.0f, 1.0f, 0.0f)) *
|
|
transform_rotate(euler.x, make_float3(1.0f, 0.0f, 0.0f));
|
|
}
|
|
|
|
ccl_device_inline Transform transform_identity()
|
|
{
|
|
return transform_scale(1.0f, 1.0f, 1.0f);
|
|
}
|
|
|
|
ccl_device_inline bool operator==(const Transform &A, const Transform &B)
|
|
{
|
|
/* Using memcmp because it returns true for NaN unlike component ==,
|
|
* which we need for set_if_different for node sockets to copy the value. */
|
|
return memcmp(&A, &B, sizeof(Transform)) == 0;
|
|
}
|
|
|
|
ccl_device_inline bool operator!=(const Transform &A, const Transform &B)
|
|
{
|
|
return !(A == B);
|
|
}
|
|
|
|
ccl_device_inline bool transform_equal_threshold(const Transform &A,
|
|
const Transform &B,
|
|
const float threshold)
|
|
{
|
|
for (int x = 0; x < 3; x++) {
|
|
for (int y = 0; y < 4; y++) {
|
|
if (fabsf(A[x][y] - B[x][y]) > threshold) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
ccl_device_inline float3 transform_get_column(const Transform *t, const int column)
|
|
{
|
|
return make_float3(t->x[column], t->y[column], t->z[column]);
|
|
}
|
|
|
|
ccl_device_inline void transform_set_column(Transform *t, const int column, const float3 value)
|
|
{
|
|
t->x[column] = value.x;
|
|
t->y[column] = value.y;
|
|
t->z[column] = value.z;
|
|
}
|
|
|
|
Transform transform_transposed_inverse(const Transform &tfm);
|
|
|
|
ccl_device_inline bool transform_uniform_scale(const Transform &tfm, float &scale)
|
|
{
|
|
/* the epsilon here is quite arbitrary, but this function is only used for
|
|
* surface area and bump, where we expect it to not be so sensitive */
|
|
const float eps = 1e-6f;
|
|
|
|
const float sx = len_squared(make_float3(tfm.x));
|
|
const float sy = len_squared(make_float3(tfm.y));
|
|
const float sz = len_squared(make_float3(tfm.z));
|
|
const float stx = len_squared(transform_get_column(&tfm, 0));
|
|
const float sty = len_squared(transform_get_column(&tfm, 1));
|
|
const float stz = len_squared(transform_get_column(&tfm, 2));
|
|
|
|
if (fabsf(sx - sy) < eps && fabsf(sx - sz) < eps && fabsf(sx - stx) < eps &&
|
|
fabsf(sx - sty) < eps && fabsf(sx - stz) < eps)
|
|
{
|
|
scale = sx;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
ccl_device_inline bool transform_negative_scale(const Transform &tfm)
|
|
{
|
|
const float3 c0 = transform_get_column(&tfm, 0);
|
|
const float3 c1 = transform_get_column(&tfm, 1);
|
|
const float3 c2 = transform_get_column(&tfm, 2);
|
|
|
|
return (dot(cross(c0, c1), c2) < 0.0f);
|
|
}
|
|
|
|
ccl_device_inline Transform transform_clear_scale(const Transform &tfm)
|
|
{
|
|
Transform ntfm = tfm;
|
|
|
|
transform_set_column(&ntfm, 0, normalize(transform_get_column(&ntfm, 0)));
|
|
transform_set_column(&ntfm, 1, normalize(transform_get_column(&ntfm, 1)));
|
|
transform_set_column(&ntfm, 2, normalize(transform_get_column(&ntfm, 2)));
|
|
|
|
return ntfm;
|
|
}
|
|
|
|
ccl_device_inline Transform transform_empty()
|
|
{
|
|
return make_transform(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
|
}
|
|
|
|
#endif
|
|
|
|
/* Motion Transform */
|
|
|
|
ccl_device_inline float4 quat_interpolate(const float4 q1, const float4 q2, const float t)
|
|
{
|
|
/* Optix and MetalRT are using linear interpolation to interpolate motion transformations. */
|
|
#if defined(__KERNEL_GPU_RAYTRACING__)
|
|
return normalize((1.0f - t) * q1 + t * q2);
|
|
#else /* defined(__KERNEL_GPU_RAYTRACING__) */
|
|
/* NOTE: this does not ensure rotation around shortest angle, q1 and q2
|
|
* are assumed to be matched already in transform_motion_decompose */
|
|
const float costheta = dot(q1, q2);
|
|
|
|
/* possible optimization: it might be possible to precompute theta/qperp */
|
|
|
|
if (costheta > 0.9995f) {
|
|
/* linear interpolation in degenerate case */
|
|
return normalize((1.0f - t) * q1 + t * q2);
|
|
}
|
|
/* slerp */
|
|
const float theta = acosf(clamp(costheta, -1.0f, 1.0f));
|
|
const float4 qperp = normalize(q2 - q1 * costheta);
|
|
const float thetap = theta * t;
|
|
return q1 * cosf(thetap) + qperp * sinf(thetap);
|
|
|
|
#endif /* defined(__KERNEL_GPU_RAYTRACING__) */
|
|
}
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
void transform_inverse_cpu_avx2(const Transform &tfm, Transform &itfm);
|
|
#endif
|
|
|
|
/* Custom cross and dot implementations that match Embree bit for bit.
|
|
* Normally we don't use SSE41/AVX outside the kernel, but for this it's
|
|
* important to match exactly for ray tracing precision. */
|
|
|
|
ccl_device_forceinline float3 transform_inverse_cross(const float3 a_, const float3 b_)
|
|
{
|
|
#if defined(__AVX2__) && defined(__KERNEL_SSE2__)
|
|
const __m128 a = (const __m128 &)a_;
|
|
const __m128 b = (const __m128 &)b_;
|
|
const __m128 a_shuffle = _mm_castsi128_ps(
|
|
_mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(3, 0, 2, 1)));
|
|
const __m128 b_shuffle = _mm_castsi128_ps(
|
|
_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(3, 0, 2, 1)));
|
|
const __m128 r = _mm_castsi128_ps(
|
|
_mm_shuffle_epi32(_mm_castps_si128(_mm_fmsub_ps(a, b_shuffle, _mm_mul_ps(a_shuffle, b))),
|
|
_MM_SHUFFLE(3, 0, 2, 1)));
|
|
return (const float3 &)r;
|
|
#endif
|
|
|
|
return cross(a_, b_);
|
|
}
|
|
|
|
ccl_device_forceinline float transform_inverse_dot(const float3 a_, const float3 b_)
|
|
{
|
|
#if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
|
|
const __m128 a = (const __m128 &)a_;
|
|
const __m128 b = (const __m128 &)b_;
|
|
return _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7F));
|
|
#endif
|
|
|
|
return dot(a_, b_);
|
|
}
|
|
|
|
ccl_device_forceinline Transform transform_inverse_impl(const Transform tfm)
|
|
{
|
|
/* This implementation matches the one in Embree exactly, to ensure consistent
|
|
* results with the ray intersection of instances. */
|
|
float3 x = make_float3(tfm.x.x, tfm.y.x, tfm.z.x);
|
|
float3 y = make_float3(tfm.x.y, tfm.y.y, tfm.z.y);
|
|
float3 z = make_float3(tfm.x.z, tfm.y.z, tfm.z.z);
|
|
const float3 w = make_float3(tfm.x.w, tfm.y.w, tfm.z.w);
|
|
|
|
/* Compute determinant. */
|
|
float det = transform_inverse_dot(x, transform_inverse_cross(y, z));
|
|
|
|
if (det == 0.0f) {
|
|
/* Matrix is degenerate (e.g. 0 scale on some axis), ideally we should
|
|
* never be in this situation, but try to invert it anyway with tweak.
|
|
*
|
|
* This logic does not match Embree which would just give an invalid
|
|
* matrix. A better solution would be to remove this and ensure any object
|
|
* matrix is valid. */
|
|
x.x += 1e-8f;
|
|
y.y += 1e-8f;
|
|
z.z += 1e-8f;
|
|
|
|
det = transform_inverse_dot(x, cross(y, z));
|
|
if (det == 0.0f) {
|
|
det = FLT_MAX;
|
|
}
|
|
}
|
|
|
|
/* Divide adjoint matrix by the determinant to compute inverse of 3x3 matrix. */
|
|
const float3 inverse_x = transform_inverse_cross(y, z) / det;
|
|
const float3 inverse_y = transform_inverse_cross(z, x) / det;
|
|
const float3 inverse_z = transform_inverse_cross(x, y) / det;
|
|
|
|
/* Compute translation and fill transform. */
|
|
Transform itfm;
|
|
itfm.x = make_float4(inverse_x, -transform_inverse_dot(inverse_x, w));
|
|
itfm.y = make_float4(inverse_y, -transform_inverse_dot(inverse_y, w));
|
|
itfm.z = make_float4(inverse_z, -transform_inverse_dot(inverse_z, w));
|
|
|
|
return itfm;
|
|
}
|
|
|
|
ccl_device_inline Transform transform_inverse(const Transform tfm)
|
|
{
|
|
/* Optimized transform implementations. */
|
|
#ifndef __KERNEL_GPU__
|
|
if (system_cpu_support_avx2()) {
|
|
Transform itfm;
|
|
transform_inverse_cpu_avx2(tfm, itfm);
|
|
return itfm;
|
|
}
|
|
#endif
|
|
|
|
return transform_inverse_impl(tfm);
|
|
}
|
|
|
|
ccl_device_inline void transform_compose(ccl_private Transform *tfm,
|
|
const ccl_private DecomposedTransform *decomp)
|
|
{
|
|
/* rotation */
|
|
float q0;
|
|
float q1;
|
|
float q2;
|
|
float q3;
|
|
float qda;
|
|
float qdb;
|
|
float qdc;
|
|
float qaa;
|
|
float qab;
|
|
float qac;
|
|
float qbb;
|
|
float qbc;
|
|
float qcc;
|
|
|
|
q0 = M_SQRT2_F * decomp->x.w;
|
|
q1 = M_SQRT2_F * decomp->x.x;
|
|
q2 = M_SQRT2_F * decomp->x.y;
|
|
q3 = M_SQRT2_F * decomp->x.z;
|
|
|
|
qda = q0 * q1;
|
|
qdb = q0 * q2;
|
|
qdc = q0 * q3;
|
|
qaa = q1 * q1;
|
|
qab = q1 * q2;
|
|
qac = q1 * q3;
|
|
qbb = q2 * q2;
|
|
qbc = q2 * q3;
|
|
qcc = q3 * q3;
|
|
|
|
const float3 rotation_x = make_float3(1.0f - qbb - qcc, -qdc + qab, qdb + qac);
|
|
const float3 rotation_y = make_float3(qdc + qab, 1.0f - qaa - qcc, -qda + qbc);
|
|
const float3 rotation_z = make_float3(-qdb + qac, qda + qbc, 1.0f - qaa - qbb);
|
|
|
|
/* scale */
|
|
const float3 scale_x = make_float3(decomp->y.w, decomp->z.z, decomp->w.y);
|
|
const float3 scale_y = make_float3(decomp->z.x, decomp->z.w, decomp->w.z);
|
|
const float3 scale_z = make_float3(decomp->z.y, decomp->w.x, decomp->w.w);
|
|
|
|
/* compose with translation */
|
|
tfm->x = make_float4(
|
|
dot(rotation_x, scale_x), dot(rotation_x, scale_y), dot(rotation_x, scale_z), decomp->y.x);
|
|
tfm->y = make_float4(
|
|
dot(rotation_y, scale_x), dot(rotation_y, scale_y), dot(rotation_y, scale_z), decomp->y.y);
|
|
tfm->z = make_float4(
|
|
dot(rotation_z, scale_x), dot(rotation_z, scale_y), dot(rotation_z, scale_z), decomp->y.z);
|
|
}
|
|
|
|
/* Interpolate from array of decomposed transforms. */
|
|
ccl_device void transform_motion_array_interpolate(ccl_private Transform *tfm,
|
|
const ccl_global DecomposedTransform *motion,
|
|
const uint numsteps,
|
|
const float time)
|
|
{
|
|
/* Figure out which steps we need to interpolate. */
|
|
const int maxstep = numsteps - 1;
|
|
const int step = min((int)(time * maxstep), maxstep - 1);
|
|
const float t = time * maxstep - step;
|
|
|
|
const ccl_global DecomposedTransform *a = motion + step;
|
|
const ccl_global DecomposedTransform *b = motion + step + 1;
|
|
|
|
/* Interpolate rotation, translation and scale. */
|
|
DecomposedTransform decomp;
|
|
decomp.x = quat_interpolate(a->x, b->x, t);
|
|
decomp.y = (1.0f - t) * a->y + t * b->y;
|
|
decomp.z = (1.0f - t) * a->z + t * b->z;
|
|
decomp.w = (1.0f - t) * a->w + t * b->w;
|
|
|
|
/* Compose rotation, translation, scale into matrix. */
|
|
transform_compose(tfm, &decomp);
|
|
}
|
|
|
|
ccl_device_inline bool transform_isfinite_safe(ccl_private Transform *tfm)
|
|
{
|
|
return isfinite_safe(tfm->x) && isfinite_safe(tfm->y) && isfinite_safe(tfm->z);
|
|
}
|
|
|
|
ccl_device_inline bool transform_decomposed_isfinite_safe(ccl_private DecomposedTransform *decomp)
|
|
{
|
|
return isfinite_safe(decomp->x) && isfinite_safe(decomp->y) && isfinite_safe(decomp->z) &&
|
|
isfinite_safe(decomp->w);
|
|
}
|
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
class BoundBox2D;
|
|
|
|
ccl_device_inline bool operator==(const DecomposedTransform &A, const DecomposedTransform &B)
|
|
{
|
|
return memcmp(&A, &B, sizeof(DecomposedTransform)) == 0;
|
|
}
|
|
|
|
float4 transform_to_quat(const Transform &tfm);
|
|
void transform_motion_decompose(DecomposedTransform *decomp,
|
|
const Transform *motion,
|
|
const size_t size);
|
|
Transform transform_from_viewplane(BoundBox2D &viewplane);
|
|
|
|
#endif
|
|
|
|
/* TODO: This can be removed when we know if no devices will require explicit
|
|
* address space qualifiers for this case. */
|
|
|
|
#define transform_point_auto transform_point
|
|
#define transform_direction_auto transform_direction
|
|
#define transform_direction_transposed_auto transform_direction_transposed
|
|
|
|
CCL_NAMESPACE_END
|