From 4cd10ddfdb1e5a1c7320162d0a3e341b166b598d Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Mon, 3 Jul 2023 18:26:02 +0200 Subject: [PATCH] Fix naive 4x4 matrix multiplication C++ implementation Is not visible on any of the officially platforms, as everywhere SSE2 is available (on Apple Silicon via sse2neon). Only got noticed by some intermittent issue during development which made BLI_HAVE_SSE2 unaccessible. Seems that transpose was done a bit wrong. Not sure if worth trying to fold the equation into C++ types, as that requires extra memory transfers for transpose. Opted for a more naive folding, which avoids extra copies. Added a regression test for it, verified against numpy, the BLI SSE2 implementation. --- source/blender/blenlib/intern/math_matrix.cc | 24 +++++++++++---- .../blenlib/tests/BLI_math_matrix_test.cc | 30 +++++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/source/blender/blenlib/intern/math_matrix.cc b/source/blender/blenlib/intern/math_matrix.cc index f590bbb726b..349b9d70035 100644 --- a/source/blender/blenlib/intern/math_matrix.cc +++ b/source/blender/blenlib/intern/math_matrix.cc @@ -43,13 +43,27 @@ template<> float4x4 float4x4::operator*(const float4x4 &b) const _mm_store_ps(result[i], sum); } #else - const float4x4 T = transpose(b); + result[0][0] = b[0][0] * a[0][0] + b[0][1] * a[1][0] + b[0][2] * a[2][0] + b[0][3] * a[3][0]; + result[0][1] = b[0][0] * a[0][1] + b[0][1] * a[1][1] + b[0][2] * a[2][1] + b[0][3] * a[3][1]; + result[0][2] = b[0][0] * a[0][2] + b[0][1] * a[1][2] + b[0][2] * a[2][2] + b[0][3] * a[3][2]; + result[0][3] = b[0][0] * a[0][3] + b[0][1] * a[1][3] + b[0][2] * a[2][3] + b[0][3] * a[3][3]; - result.x = float4(dot(a.x, T.x), dot(a.x, T.y), dot(a.x, T.z), dot(a.x, T.w)); - result.y = float4(dot(a.y, T.x), dot(a.y, T.y), dot(a.y, T.z), dot(a.y, T.w)); - result.z = float4(dot(a.z, T.x), dot(a.z, T.y), dot(a.z, T.z), dot(a.z, T.w)); - result.w = float4(dot(a.w, T.x), dot(a.w, T.y), dot(a.w, T.z), dot(a.w, T.w)); + result[1][0] = b[1][0] * a[0][0] + b[1][1] * a[1][0] + b[1][2] * a[2][0] + b[1][3] * a[3][0]; + result[1][1] = b[1][0] * a[0][1] + b[1][1] * a[1][1] + b[1][2] * a[2][1] + b[1][3] * a[3][1]; + result[1][2] = b[1][0] * a[0][2] + b[1][1] * a[1][2] + b[1][2] * a[2][2] + b[1][3] * a[3][2]; + result[1][3] = b[1][0] * a[0][3] + b[1][1] * a[1][3] + b[1][2] * a[2][3] + b[1][3] * a[3][3]; + + result[2][0] = b[2][0] * a[0][0] + b[2][1] * a[1][0] + b[2][2] * a[2][0] + b[2][3] * a[3][0]; + result[2][1] = b[2][0] * a[0][1] + b[2][1] * a[1][1] + b[2][2] * a[2][1] + b[2][3] * a[3][1]; + result[2][2] = b[2][0] * a[0][2] + b[2][1] * a[1][2] + b[2][2] * a[2][2] + b[2][3] * a[3][2]; + result[2][3] = b[2][0] * a[0][3] + b[2][1] * a[1][3] + b[2][2] * a[2][3] + b[2][3] * a[3][3]; + + result[3][0] = b[3][0] * a[0][0] + b[3][1] * a[1][0] + b[3][2] * a[2][0] + b[3][3] * a[3][0]; + result[3][1] = b[3][0] * a[0][1] + b[3][1] * a[1][1] + b[3][2] * a[2][1] + b[3][3] * a[3][1]; + result[3][2] = b[3][0] * a[0][2] + b[3][1] * a[1][2] + b[3][2] * a[2][2] + b[3][3] * a[3][2]; + result[3][3] = b[3][0] * a[0][3] + b[3][1] * a[1][3] + b[3][2] * a[2][3] + b[3][3] * a[3][3]; #endif + return result; } diff --git a/source/blender/blenlib/tests/BLI_math_matrix_test.cc b/source/blender/blenlib/tests/BLI_math_matrix_test.cc index cc7e7c00766..4ca51c3c38b 100644 --- a/source/blender/blenlib/tests/BLI_math_matrix_test.cc +++ b/source/blender/blenlib/tests/BLI_math_matrix_test.cc @@ -328,6 +328,36 @@ TEST(math_matrix, MatrixCompareTest) EXPECT_FALSE(is_negative(m6)); } +TEST(math_matrix, MatrixMultiply) +{ + + { + const float4x4 matrix_a = { + {1.0, 2.0, 3.0, 4.0}, + {5.0, 6.0, 7.0, 8.0}, + {9.0, 10.0, 11.0, 12.0}, + {13.0, 14.0, 15.0, 16.0}, + }; + const float4x4 matrix_b = { + {0.1f, 0.2f, 0.3f, 0.4f}, + {0.5f, 0.6f, 0.7f, 0.8f}, + {0.9f, 1.0f, 1.1f, 1.2f}, + {1.3f, 1.4f, 1.5f, 1.6f}, + }; + + const float4x4 expected = { + {9.0f, 10.0f, 11.0f, 12.0f}, + {20.2f, 22.8f, 25.4f, 28.0f}, + {31.4f, 35.6f, 39.8f, 44.0f}, + {42.6f, 48.4f, 54.2f, 60.0f}, + }; + + const float4x4 result = matrix_a * matrix_b; + + EXPECT_M4_NEAR(result, expected, 1e-5f); + } +} + TEST(math_matrix, MatrixToNearestEuler) { EulerXYZ eul1 = EulerXYZ(225.08542, -1.12485, -121.23738);