Fix naive 4x4 matrix multiplication C++ implementation
Is not visible on any of the officially platforms, as everywhere SSE2 is available (on Apple Silicon via sse2neon). Only got noticed by some intermittent issue during development which made BLI_HAVE_SSE2 unaccessible. Seems that transpose was done a bit wrong. Not sure if worth trying to fold the equation into C++ types, as that requires extra memory transfers for transpose. Opted for a more naive folding, which avoids extra copies. Added a regression test for it, verified against numpy, the BLI SSE2 implementation.
This commit is contained in:
committed by
Sergey Sharybin
parent
baeb314eb7
commit
4cd10ddfdb
@@ -43,13 +43,27 @@ template<> float4x4 float4x4::operator*(const float4x4 &b) const
|
||||
_mm_store_ps(result[i], sum);
|
||||
}
|
||||
#else
|
||||
const float4x4 T = transpose(b);
|
||||
result[0][0] = b[0][0] * a[0][0] + b[0][1] * a[1][0] + b[0][2] * a[2][0] + b[0][3] * a[3][0];
|
||||
result[0][1] = b[0][0] * a[0][1] + b[0][1] * a[1][1] + b[0][2] * a[2][1] + b[0][3] * a[3][1];
|
||||
result[0][2] = b[0][0] * a[0][2] + b[0][1] * a[1][2] + b[0][2] * a[2][2] + b[0][3] * a[3][2];
|
||||
result[0][3] = b[0][0] * a[0][3] + b[0][1] * a[1][3] + b[0][2] * a[2][3] + b[0][3] * a[3][3];
|
||||
|
||||
result.x = float4(dot(a.x, T.x), dot(a.x, T.y), dot(a.x, T.z), dot(a.x, T.w));
|
||||
result.y = float4(dot(a.y, T.x), dot(a.y, T.y), dot(a.y, T.z), dot(a.y, T.w));
|
||||
result.z = float4(dot(a.z, T.x), dot(a.z, T.y), dot(a.z, T.z), dot(a.z, T.w));
|
||||
result.w = float4(dot(a.w, T.x), dot(a.w, T.y), dot(a.w, T.z), dot(a.w, T.w));
|
||||
result[1][0] = b[1][0] * a[0][0] + b[1][1] * a[1][0] + b[1][2] * a[2][0] + b[1][3] * a[3][0];
|
||||
result[1][1] = b[1][0] * a[0][1] + b[1][1] * a[1][1] + b[1][2] * a[2][1] + b[1][3] * a[3][1];
|
||||
result[1][2] = b[1][0] * a[0][2] + b[1][1] * a[1][2] + b[1][2] * a[2][2] + b[1][3] * a[3][2];
|
||||
result[1][3] = b[1][0] * a[0][3] + b[1][1] * a[1][3] + b[1][2] * a[2][3] + b[1][3] * a[3][3];
|
||||
|
||||
result[2][0] = b[2][0] * a[0][0] + b[2][1] * a[1][0] + b[2][2] * a[2][0] + b[2][3] * a[3][0];
|
||||
result[2][1] = b[2][0] * a[0][1] + b[2][1] * a[1][1] + b[2][2] * a[2][1] + b[2][3] * a[3][1];
|
||||
result[2][2] = b[2][0] * a[0][2] + b[2][1] * a[1][2] + b[2][2] * a[2][2] + b[2][3] * a[3][2];
|
||||
result[2][3] = b[2][0] * a[0][3] + b[2][1] * a[1][3] + b[2][2] * a[2][3] + b[2][3] * a[3][3];
|
||||
|
||||
result[3][0] = b[3][0] * a[0][0] + b[3][1] * a[1][0] + b[3][2] * a[2][0] + b[3][3] * a[3][0];
|
||||
result[3][1] = b[3][0] * a[0][1] + b[3][1] * a[1][1] + b[3][2] * a[2][1] + b[3][3] * a[3][1];
|
||||
result[3][2] = b[3][0] * a[0][2] + b[3][1] * a[1][2] + b[3][2] * a[2][2] + b[3][3] * a[3][2];
|
||||
result[3][3] = b[3][0] * a[0][3] + b[3][1] * a[1][3] + b[3][2] * a[2][3] + b[3][3] * a[3][3];
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -328,6 +328,36 @@ TEST(math_matrix, MatrixCompareTest)
|
||||
EXPECT_FALSE(is_negative(m6));
|
||||
}
|
||||
|
||||
TEST(math_matrix, MatrixMultiply)
|
||||
{
|
||||
|
||||
{
|
||||
const float4x4 matrix_a = {
|
||||
{1.0, 2.0, 3.0, 4.0},
|
||||
{5.0, 6.0, 7.0, 8.0},
|
||||
{9.0, 10.0, 11.0, 12.0},
|
||||
{13.0, 14.0, 15.0, 16.0},
|
||||
};
|
||||
const float4x4 matrix_b = {
|
||||
{0.1f, 0.2f, 0.3f, 0.4f},
|
||||
{0.5f, 0.6f, 0.7f, 0.8f},
|
||||
{0.9f, 1.0f, 1.1f, 1.2f},
|
||||
{1.3f, 1.4f, 1.5f, 1.6f},
|
||||
};
|
||||
|
||||
const float4x4 expected = {
|
||||
{9.0f, 10.0f, 11.0f, 12.0f},
|
||||
{20.2f, 22.8f, 25.4f, 28.0f},
|
||||
{31.4f, 35.6f, 39.8f, 44.0f},
|
||||
{42.6f, 48.4f, 54.2f, 60.0f},
|
||||
};
|
||||
|
||||
const float4x4 result = matrix_a * matrix_b;
|
||||
|
||||
EXPECT_M4_NEAR(result, expected, 1e-5f);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(math_matrix, MatrixToNearestEuler)
|
||||
{
|
||||
EulerXYZ eul1 = EulerXYZ(225.08542, -1.12485, -121.23738);
|
||||
|
||||
Reference in New Issue
Block a user