Files
test/intern/cycles/util/transform.cpp
Patrick Mours d0dd587b60 Fix #108372: GPU implementation of OSL matrix intrinsic functions
All the OSL matrix functions had been implemented using the
`Transform` utility of Cycles, but that's built around a 4x3 matrix,
when the OSL matrix functions are working with 4x4 matrices.
This resulted in them not producing results consistent with the
CPU implementation.

This fixes that by making use of the `ProjectionTransform` utility
of Cycles instead, because it's built around a 4x4 matrix. Since
matrix inversion is required, I had to make a few more utility
functions available on the GPU (except Metal, due to use of
references/pointers without specification) that were previously
CPU-only.

Co-authored-by: Brecht Van Lommel <brecht@blender.org>

Pull Request: https://projects.blender.org/blender/blender/pulls/110102
2024-11-04 17:59:29 +01:00

200 lines
5.1 KiB
C++

/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#include "util/transform.h"
#include "util/projection.h"
#include "util/boundbox.h"
#include "util/math.h"
CCL_NAMESPACE_BEGIN
/* Transform Inverse */
Transform transform_transposed_inverse(const Transform &tfm)
{
ProjectionTransform iprojection(transform_inverse(tfm));
return projection_to_transform(projection_transpose(iprojection));
}
/* Motion Transform */
float4 transform_to_quat(const Transform &tfm)
{
double trace = (double)(tfm[0][0] + tfm[1][1] + tfm[2][2]);
float4 qt;
if (trace > 0.0) {
double s = sqrt(trace + 1.0);
qt.w = (float)(s / 2.0);
s = 0.5 / s;
qt.x = (float)((double)(tfm[2][1] - tfm[1][2]) * s);
qt.y = (float)((double)(tfm[0][2] - tfm[2][0]) * s);
qt.z = (float)((double)(tfm[1][0] - tfm[0][1]) * s);
}
else {
int i = 0;
if (tfm[1][1] > tfm[i][i]) {
i = 1;
}
if (tfm[2][2] > tfm[i][i]) {
i = 2;
}
int j = (i + 1) % 3;
int k = (j + 1) % 3;
double s = sqrt((double)(tfm[i][i] - (tfm[j][j] + tfm[k][k])) + 1.0);
double q[3];
q[i] = s * 0.5;
if (s != 0.0) {
s = 0.5 / s;
}
double w = (double)(tfm[k][j] - tfm[j][k]) * s;
q[j] = (double)(tfm[j][i] + tfm[i][j]) * s;
q[k] = (double)(tfm[k][i] + tfm[i][k]) * s;
qt.x = (float)q[0];
qt.y = (float)q[1];
qt.z = (float)q[2];
qt.w = (float)w;
}
return qt;
}
static void transform_decompose(DecomposedTransform *decomp, const Transform *tfm)
{
/* extract translation */
decomp->y = make_float4(tfm->x.w, tfm->y.w, tfm->z.w, 0.0f);
/* extract rotation */
Transform M = *tfm;
M.x.w = 0.0f;
M.y.w = 0.0f;
M.z.w = 0.0f;
#if 0
Transform R = M;
float norm;
int iteration = 0;
do {
Transform Rnext;
Transform Rit = transform_transposed_inverse(R);
for (int i = 0; i < 3; i++)
for (int j = 0; j < 4; j++)
Rnext[i][j] = 0.5f * (R[i][j] + Rit[i][j]);
norm = 0.0f;
for (int i = 0; i < 3; i++) {
norm = max(norm,
fabsf(R[i][0] - Rnext[i][0]) + fabsf(R[i][1] - Rnext[i][1]) +
fabsf(R[i][2] - Rnext[i][2]));
}
R = Rnext;
iteration++;
} while (iteration < 100 && norm > 1e-4f);
if (transform_negative_scale(R))
R = R * transform_scale(-1.0f, -1.0f, -1.0f);
decomp->x = transform_to_quat(R);
/* extract scale and pack it */
Transform scale = transform_inverse(R) * M;
decomp->y.w = scale.x.x;
decomp->z = make_float4(scale.x.y, scale.x.z, scale.y.x, scale.y.y);
decomp->w = make_float4(scale.y.z, scale.z.x, scale.z.y, scale.z.z);
#else
float3 colx = transform_get_column(&M, 0);
float3 coly = transform_get_column(&M, 1);
float3 colz = transform_get_column(&M, 2);
/* extract scale and shear first */
float3 scale, shear;
scale.x = len(colx);
colx = safe_divide(colx, scale.x);
shear.z = dot(colx, coly);
coly -= shear.z * colx;
scale.y = len(coly);
coly = safe_divide(coly, scale.y);
shear.y = dot(colx, colz);
colz -= shear.y * colx;
shear.x = dot(coly, colz);
colz -= shear.x * coly;
scale.z = len(colz);
colz = safe_divide(colz, scale.z);
transform_set_column(&M, 0, colx);
transform_set_column(&M, 1, coly);
transform_set_column(&M, 2, colz);
if (transform_negative_scale(M)) {
scale *= -1.0f;
M = M * transform_scale(-1.0f, -1.0f, -1.0f);
}
decomp->x = transform_to_quat(M);
decomp->y.w = scale.x;
decomp->z = make_float4(shear.z, shear.y, 0.0f, scale.y);
decomp->w = make_float4(shear.x, 0.0f, 0.0f, scale.z);
#endif
}
void transform_motion_decompose(DecomposedTransform *decomp, const Transform *motion, size_t size)
{
/* Decompose and correct rotation. */
for (size_t i = 0; i < size; i++) {
transform_decompose(decomp + i, motion + i);
if (i > 0) {
/* Ensure rotation around shortest angle, negated quaternions are the same
* but this means we don't have to do the check in quat_interpolate */
if (dot(decomp[i - 1].x, decomp[i].x) < 0.0f) {
decomp[i].x = -decomp[i].x;
}
}
}
/* Copy rotation to decomposed transform where scale is degenerate. This avoids weird object
* rotation interpolation when the scale goes to 0 for a time step.
*
* Note that this is very simple and naive implementation, which only deals with degenerated
* scale happening only on one frame. It is possible to improve it further by interpolating
* rotation into s degenerated range using rotation from time-steps from adjacent non-degenerated
* time steps. */
for (size_t i = 0; i < size; i++) {
const float3 scale = make_float3(decomp[i].y.w, decomp[i].z.w, decomp[i].w.w);
if (!is_zero(scale)) {
continue;
}
if (i > 0) {
decomp[i].x = decomp[i - 1].x;
}
else if (i < size - 1) {
decomp[i].x = decomp[i + 1].x;
}
}
}
Transform transform_from_viewplane(BoundBox2D &viewplane)
{
return transform_scale(1.0f / (viewplane.right - viewplane.left),
1.0f / (viewplane.top - viewplane.bottom),
1.0f) *
transform_translate(-viewplane.left, -viewplane.bottom, 0.0f);
}
CCL_NAMESPACE_END