Fix naive 4x4 matrix multiplication C++ implementation

Is not visible on any of the officially platforms, as everywhere
SSE2 is available (on Apple Silicon via sse2neon).

Only got noticed by some intermittent issue during development
which made BLI_HAVE_SSE2 unaccessible.

Seems that transpose was done a bit wrong. Not sure if worth trying
to fold the equation into C++ types, as that requires extra memory
transfers for transpose. Opted for a more naive folding, which
avoids extra copies.

Added a regression test for it, verified against numpy, the BLI
SSE2 implementation.
This commit is contained in:
Sergey Sharybin
2023-07-03 18:26:02 +02:00
committed by Sergey Sharybin
parent baeb314eb7
commit 4cd10ddfdb
2 changed files with 49 additions and 5 deletions

View File

@@ -43,13 +43,27 @@ template<> float4x4 float4x4::operator*(const float4x4 &b) const
_mm_store_ps(result[i], sum);
}
#else
const float4x4 T = transpose(b);
result[0][0] = b[0][0] * a[0][0] + b[0][1] * a[1][0] + b[0][2] * a[2][0] + b[0][3] * a[3][0];
result[0][1] = b[0][0] * a[0][1] + b[0][1] * a[1][1] + b[0][2] * a[2][1] + b[0][3] * a[3][1];
result[0][2] = b[0][0] * a[0][2] + b[0][1] * a[1][2] + b[0][2] * a[2][2] + b[0][3] * a[3][2];
result[0][3] = b[0][0] * a[0][3] + b[0][1] * a[1][3] + b[0][2] * a[2][3] + b[0][3] * a[3][3];
result.x = float4(dot(a.x, T.x), dot(a.x, T.y), dot(a.x, T.z), dot(a.x, T.w));
result.y = float4(dot(a.y, T.x), dot(a.y, T.y), dot(a.y, T.z), dot(a.y, T.w));
result.z = float4(dot(a.z, T.x), dot(a.z, T.y), dot(a.z, T.z), dot(a.z, T.w));
result.w = float4(dot(a.w, T.x), dot(a.w, T.y), dot(a.w, T.z), dot(a.w, T.w));
result[1][0] = b[1][0] * a[0][0] + b[1][1] * a[1][0] + b[1][2] * a[2][0] + b[1][3] * a[3][0];
result[1][1] = b[1][0] * a[0][1] + b[1][1] * a[1][1] + b[1][2] * a[2][1] + b[1][3] * a[3][1];
result[1][2] = b[1][0] * a[0][2] + b[1][1] * a[1][2] + b[1][2] * a[2][2] + b[1][3] * a[3][2];
result[1][3] = b[1][0] * a[0][3] + b[1][1] * a[1][3] + b[1][2] * a[2][3] + b[1][3] * a[3][3];
result[2][0] = b[2][0] * a[0][0] + b[2][1] * a[1][0] + b[2][2] * a[2][0] + b[2][3] * a[3][0];
result[2][1] = b[2][0] * a[0][1] + b[2][1] * a[1][1] + b[2][2] * a[2][1] + b[2][3] * a[3][1];
result[2][2] = b[2][0] * a[0][2] + b[2][1] * a[1][2] + b[2][2] * a[2][2] + b[2][3] * a[3][2];
result[2][3] = b[2][0] * a[0][3] + b[2][1] * a[1][3] + b[2][2] * a[2][3] + b[2][3] * a[3][3];
result[3][0] = b[3][0] * a[0][0] + b[3][1] * a[1][0] + b[3][2] * a[2][0] + b[3][3] * a[3][0];
result[3][1] = b[3][0] * a[0][1] + b[3][1] * a[1][1] + b[3][2] * a[2][1] + b[3][3] * a[3][1];
result[3][2] = b[3][0] * a[0][2] + b[3][1] * a[1][2] + b[3][2] * a[2][2] + b[3][3] * a[3][2];
result[3][3] = b[3][0] * a[0][3] + b[3][1] * a[1][3] + b[3][2] * a[2][3] + b[3][3] * a[3][3];
#endif
return result;
}

View File

@@ -328,6 +328,36 @@ TEST(math_matrix, MatrixCompareTest)
EXPECT_FALSE(is_negative(m6));
}
TEST(math_matrix, MatrixMultiply)
{
{
const float4x4 matrix_a = {
{1.0, 2.0, 3.0, 4.0},
{5.0, 6.0, 7.0, 8.0},
{9.0, 10.0, 11.0, 12.0},
{13.0, 14.0, 15.0, 16.0},
};
const float4x4 matrix_b = {
{0.1f, 0.2f, 0.3f, 0.4f},
{0.5f, 0.6f, 0.7f, 0.8f},
{0.9f, 1.0f, 1.1f, 1.2f},
{1.3f, 1.4f, 1.5f, 1.6f},
};
const float4x4 expected = {
{9.0f, 10.0f, 11.0f, 12.0f},
{20.2f, 22.8f, 25.4f, 28.0f},
{31.4f, 35.6f, 39.8f, 44.0f},
{42.6f, 48.4f, 54.2f, 60.0f},
};
const float4x4 result = matrix_a * matrix_b;
EXPECT_M4_NEAR(result, expected, 1e-5f);
}
}
TEST(math_matrix, MatrixToNearestEuler)
{
EulerXYZ eul1 = EulerXYZ(225.08542, -1.12485, -121.23738);