From 6769acbbba7f0beb50dafcae3f49de9caee55787 Mon Sep 17 00:00:00 2001 From: Chris Blackbourn Date: Tue, 17 Jan 2023 11:14:52 +1300 Subject: [PATCH] BLI_math: simplify matrix multiply logic Improve safety and correctness of matrix multiplication by using temporary storage if one of the inputs is also the output. No functional changes. Differential Revision: https://developer.blender.org/D16876 Reviewed By: Campbell Barton, Sergey Sharybin --- source/blender/blenlib/BLI_math_matrix.h | 6 +- source/blender/blenlib/intern/math_matrix.c | 228 +++++++----------- .../intern/lineart/lineart_cpu.cc | 6 +- .../intern/lineart/lineart_shadow.c | 2 +- 4 files changed, 94 insertions(+), 148 deletions(-) diff --git a/source/blender/blenlib/BLI_math_matrix.h b/source/blender/blenlib/BLI_math_matrix.h index 538474f58b6..1278bc90e44 100644 --- a/source/blender/blenlib/BLI_math_matrix.h +++ b/source/blender/blenlib/BLI_math_matrix.h @@ -83,16 +83,12 @@ void mul_m3_m4m4(float R[3][3], const float A[4][4], const float B[4][4]); /** * Special matrix multiplies - * - uniq: `R <-- AB`, R is neither A nor B * - pre: `R <-- AR` * - post: `R <-- RB`. */ -void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3]); void mul_m3_m3_pre(float R[3][3], const float A[3][3]); void mul_m3_m3_post(float R[3][3], const float B[3][3]); -void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4]); -void mul_m4_m4m4_db_uniq(double R[4][4], const double A[4][4], const double B[4][4]); -void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B[4][4]); +void mul_m4db_m4db_m4fl(double R[4][4], const double A[4][4], const float B[4][4]); void mul_m4_m4_pre(float R[4][4], const float A[4][4]); void mul_m4_m4_post(float R[4][4], const float B[4][4]); diff --git a/source/blender/blenlib/intern/math_matrix.c b/source/blender/blenlib/intern/math_matrix.c index d997eae26fb..7322a9facec 100644 --- a/source/blender/blenlib/intern/math_matrix.c +++ b/source/blender/blenlib/intern/math_matrix.c @@ -257,22 +257,14 @@ void shuffle_m4(float R[4][4], const int index[4]) void mul_m4_m4m4(float R[4][4], const float A[4][4], const float B[4][4]) { - if (A == R) { - mul_m4_m4_post(R, B); + if (R == A || R == B) { + float T[4][4]; + mul_m4_m4m4(T, A, B); + copy_m4_m4(R, T); + return; } - else if (B == R) { - mul_m4_m4_pre(R, A); - } - else { - mul_m4_m4m4_uniq(R, A, B); - } -} -void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4]) -{ - BLI_assert(!ELEM(R, A, B)); - - /* Matrix product: `R[j][k] = A[j][i] . B[i][k]`. */ + /* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */ #ifdef BLI_HAVE_SSE2 __m128 A0 = _mm_loadu_ps(A[0]); __m128 A1 = _mm_loadu_ps(A[1]); @@ -313,39 +305,16 @@ void mul_m4_m4m4_uniq(float R[4][4], const float A[4][4], const float B[4][4]) #endif } -void mul_m4_m4m4_db_uniq(double R[4][4], const double A[4][4], const double B[4][4]) +void mul_m4db_m4db_m4fl(double R[4][4], const double A[4][4], const float B[4][4]) { - BLI_assert(!ELEM(R, A, B)); + if (R == A) { + double T[4][4]; + mul_m4db_m4db_m4fl(T, A, B); + copy_m4_m4_db(R, T); + return; + } - /* Matrix product: `R[j][k] = A[j][i] . B[i][k]`. */ - - R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0]; - R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1]; - R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2] + B[0][3] * A[3][2]; - R[0][3] = B[0][0] * A[0][3] + B[0][1] * A[1][3] + B[0][2] * A[2][3] + B[0][3] * A[3][3]; - - R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0] + B[1][3] * A[3][0]; - R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1] + B[1][3] * A[3][1]; - R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2] + B[1][3] * A[3][2]; - R[1][3] = B[1][0] * A[0][3] + B[1][1] * A[1][3] + B[1][2] * A[2][3] + B[1][3] * A[3][3]; - - R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0] + B[2][3] * A[3][0]; - R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1] + B[2][3] * A[3][1]; - R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2] + B[2][3] * A[3][2]; - R[2][3] = B[2][0] * A[0][3] + B[2][1] * A[1][3] + B[2][2] * A[2][3] + B[2][3] * A[3][3]; - - R[3][0] = B[3][0] * A[0][0] + B[3][1] * A[1][0] + B[3][2] * A[2][0] + B[3][3] * A[3][0]; - R[3][1] = B[3][0] * A[0][1] + B[3][1] * A[1][1] + B[3][2] * A[2][1] + B[3][3] * A[3][1]; - R[3][2] = B[3][0] * A[0][2] + B[3][1] * A[1][2] + B[3][2] * A[2][2] + B[3][3] * A[3][2]; - R[3][3] = B[3][0] * A[0][3] + B[3][1] * A[1][3] + B[3][2] * A[2][3] + B[3][3] * A[3][3]; -} - -void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B[4][4]) -{ - /* Remove second check since types don't match. */ - BLI_assert(!ELEM(R, A /*, B */)); - - /* Matrix product: `R[j][k] = A[j][i] . B[i][k]`. */ + /* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */ R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0] + B[0][3] * A[3][0]; R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1] + B[0][3] * A[3][1]; @@ -370,53 +339,32 @@ void mul_m4db_m4db_m4fl_uniq(double R[4][4], const double A[4][4], const float B void mul_m4_m4_pre(float R[4][4], const float A[4][4]) { - BLI_assert(A != R); - float B[4][4]; - copy_m4_m4(B, R); - mul_m4_m4m4_uniq(R, A, B); + mul_m4_m4m4(R, A, R); } void mul_m4_m4_post(float R[4][4], const float B[4][4]) { - BLI_assert(B != R); - float A[4][4]; - copy_m4_m4(A, R); - mul_m4_m4m4_uniq(R, A, B); -} - -void mul_m3_m3m3(float R[3][3], const float A[3][3], const float B[3][3]) -{ - if (A == R) { - mul_m3_m3_post(R, B); - } - else if (B == R) { - mul_m3_m3_pre(R, A); - } - else { - mul_m3_m3m3_uniq(R, A, B); - } + mul_m4_m4m4(R, R, B); } void mul_m3_m3_pre(float R[3][3], const float A[3][3]) { - BLI_assert(A != R); - float B[3][3]; - copy_m3_m3(B, R); - mul_m3_m3m3_uniq(R, A, B); + mul_m3_m3m3(R, A, R); } void mul_m3_m3_post(float R[3][3], const float B[3][3]) { - BLI_assert(B != R); - float A[3][3]; - copy_m3_m3(A, R); - mul_m3_m3m3_uniq(R, A, B); + mul_m3_m3m3(R, R, B); } -void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3]) +void mul_m3_m3m3(float R[3][3], const float A[3][3], const float B[3][3]) { - BLI_assert(!ELEM(R, A, B)); - + if (R == A || R == B) { + float T[3][3]; + mul_m3_m3m3(T, A, B); + copy_m3_m3(R, T); + return; + } R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0]; R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1]; R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2]; @@ -432,88 +380,90 @@ void mul_m3_m3m3_uniq(float R[3][3], const float A[3][3], const float B[3][3]) void mul_m4_m4m3(float R[4][4], const float A[4][4], const float B[3][3]) { - float B_[3][3], A_[4][4]; + if (R == A) { + float T[4][4]; + mul_m4_m4m3(T, A, B); + copy_m4_m4(R, T); + return; + } - /* copy so it works when R is the same pointer as A or B */ - /* TODO: avoid copying when matrices are different */ - copy_m4_m4(A_, A); - copy_m3_m3(B_, B); - - R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0]; - R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1]; - R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2]; - R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0]; - R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1]; - R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2]; - R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0]; - R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1]; - R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2]; + R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0]; + R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1]; + R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2]; + R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0]; + R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1]; + R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2]; + R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0]; + R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1]; + R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2]; } void mul_m3_m3m4(float R[3][3], const float A[3][3], const float B[4][4]) { - float B_[4][4], A_[3][3]; + if (R == A) { + float T[3][3]; + mul_m3_m3m4(T, A, B); + copy_m3_m3(R, T); + return; + } - /* copy so it works when R is the same pointer as A or B */ - /* TODO: avoid copying when matrices are different */ - copy_m3_m3(A_, A); - copy_m4_m4(B_, B); + /* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */ - /* R[i][j] = B_[i][k] * A_[k][j] */ - R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0]; - R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1]; - R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2]; + R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0]; + R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1]; + R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2]; - R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0]; - R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1]; - R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2]; + R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0]; + R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1]; + R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2]; - R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0]; - R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1]; - R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2]; + R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0]; + R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1]; + R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2]; } void mul_m3_m4m3(float R[3][3], const float A[4][4], const float B[3][3]) { - float B_[3][3], A_[4][4]; + if (R == B) { + float T[3][3]; + mul_m3_m4m3(T, A, B); + copy_m3_m3(R, T); + return; + } - /* copy so it works when R is the same pointer as A or B */ - /* TODO: avoid copying when matrices are different */ - copy_m4_m4(A_, A); - copy_m3_m3(B_, B); + /* Matrix product: `R[j][k] = B[j][i] . A[i][k]`. */ - /* R[i][j] = B[i][k] * A[k][j] */ - R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0]; - R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1]; - R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2]; + R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0]; + R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1]; + R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2]; - R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0]; - R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1]; - R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2]; + R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0]; + R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1]; + R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2]; - R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0]; - R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1]; - R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2]; + R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0]; + R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1]; + R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2]; } void mul_m4_m3m4(float R[4][4], const float A[3][3], const float B[4][4]) { - float B_[4][4], A_[3][3]; + if (R == B) { + float T[4][4]; + mul_m4_m3m4(T, A, B); + copy_m4_m4(R, T); + return; + } - /* copy so it works when R is the same pointer as A or B */ - /* TODO: avoid copying when matrices are different */ - copy_m3_m3(A_, A); - copy_m4_m4(B_, B); - - R[0][0] = B_[0][0] * A_[0][0] + B_[0][1] * A_[1][0] + B_[0][2] * A_[2][0]; - R[0][1] = B_[0][0] * A_[0][1] + B_[0][1] * A_[1][1] + B_[0][2] * A_[2][1]; - R[0][2] = B_[0][0] * A_[0][2] + B_[0][1] * A_[1][2] + B_[0][2] * A_[2][2]; - R[1][0] = B_[1][0] * A_[0][0] + B_[1][1] * A_[1][0] + B_[1][2] * A_[2][0]; - R[1][1] = B_[1][0] * A_[0][1] + B_[1][1] * A_[1][1] + B_[1][2] * A_[2][1]; - R[1][2] = B_[1][0] * A_[0][2] + B_[1][1] * A_[1][2] + B_[1][2] * A_[2][2]; - R[2][0] = B_[2][0] * A_[0][0] + B_[2][1] * A_[1][0] + B_[2][2] * A_[2][0]; - R[2][1] = B_[2][0] * A_[0][1] + B_[2][1] * A_[1][1] + B_[2][2] * A_[2][1]; - R[2][2] = B_[2][0] * A_[0][2] + B_[2][1] * A_[1][2] + B_[2][2] * A_[2][2]; + R[0][0] = B[0][0] * A[0][0] + B[0][1] * A[1][0] + B[0][2] * A[2][0]; + R[0][1] = B[0][0] * A[0][1] + B[0][1] * A[1][1] + B[0][2] * A[2][1]; + R[0][2] = B[0][0] * A[0][2] + B[0][1] * A[1][2] + B[0][2] * A[2][2]; + R[1][0] = B[1][0] * A[0][0] + B[1][1] * A[1][0] + B[1][2] * A[2][0]; + R[1][1] = B[1][0] * A[0][1] + B[1][1] * A[1][1] + B[1][2] * A[2][1]; + R[1][2] = B[1][0] * A[0][2] + B[1][1] * A[1][2] + B[1][2] * A[2][2]; + R[2][0] = B[2][0] * A[0][0] + B[2][1] * A[1][0] + B[2][2] * A[2][0]; + R[2][1] = B[2][0] * A[0][1] + B[2][1] * A[1][1] + B[2][2] * A[2][1]; + R[2][2] = B[2][0] * A[0][2] + B[2][1] * A[1][2] + B[2][2] * A[2][2]; } void mul_m3_m4m4(float R[3][3], const float A[4][4], const float B[4][4]) @@ -1304,7 +1254,7 @@ void mul_m4_m4m4_aligned_scale(float R[4][4], const float A[4][4], const float B mat4_to_loc_rot_size(loc_b, rot_b, size_b, B); mul_v3_m4v3(loc_r, A, loc_b); - mul_m3_m3m3_uniq(rot_r, rot_a, rot_b); + mul_m3_m3m3(rot_r, rot_a, rot_b); mul_v3_v3v3(size_r, size_a, size_b); loc_rot_size_to_mat4(R, loc_r, rot_r, size_r); @@ -1320,7 +1270,7 @@ void mul_m4_m4m4_split_channels(float R[4][4], const float A[4][4], const float mat4_to_loc_rot_size(loc_b, rot_b, size_b, B); add_v3_v3v3(loc_r, loc_a, loc_b); - mul_m3_m3m3_uniq(rot_r, rot_a, rot_b); + mul_m3_m3m3(rot_r, rot_a, rot_b); mul_v3_v3v3(size_r, size_a, size_b); loc_rot_size_to_mat4(R, loc_r, rot_r, size_r); diff --git a/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.cc b/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.cc index 60279e687cc..695452f43ed 100644 --- a/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.cc +++ b/source/blender/gpencil_modifiers/intern/lineart/lineart_cpu.cc @@ -2447,8 +2447,8 @@ static void lineart_object_load_single_instance(LineartData *ld, /* Prepare the matrix used for transforming this specific object (instance). This has to be * done before mesh boundbox check because the function needs that. */ - mul_m4db_m4db_m4fl_uniq(obi->model_view_proj, ld->conf.view_projection, use_mat); - mul_m4db_m4db_m4fl_uniq(obi->model_view, ld->conf.view, use_mat); + mul_m4db_m4db_m4fl(obi->model_view_proj, ld->conf.view_projection, use_mat); + mul_m4db_m4db_m4fl(obi->model_view, ld->conf.view, use_mat); if (!ELEM(ob->type, OB_MESH, OB_MBALL, OB_CURVES_LEGACY, OB_SURF, OB_FONT)) { return; @@ -2524,7 +2524,7 @@ void lineart_main_load_geometries(Depsgraph *depsgraph, } invert_m4_m4(inv, ld->conf.cam_obmat); - mul_m4db_m4db_m4fl_uniq(result, proj, inv); + mul_m4db_m4db_m4fl(result, proj, inv); copy_m4_m4_db(proj, result); copy_m4_m4_db(ld->conf.view_projection, proj); diff --git a/source/blender/gpencil_modifiers/intern/lineart/lineart_shadow.c b/source/blender/gpencil_modifiers/intern/lineart/lineart_shadow.c index 9d825a4e993..19bc69b5566 100644 --- a/source/blender/gpencil_modifiers/intern/lineart/lineart_shadow.c +++ b/source/blender/gpencil_modifiers/intern/lineart/lineart_shadow.c @@ -1226,7 +1226,7 @@ bool lineart_main_try_generate_shadow(Depsgraph *depsgraph, proj, -ld->w, ld->w, -ld->h, ld->h, ld->conf.near_clip, ld->conf.far_clip); } invert_m4_m4(inv, ld->conf.cam_obmat); - mul_m4db_m4db_m4fl_uniq(result, proj, inv); + mul_m4db_m4db_m4fl(result, proj, inv); copy_m4_m4_db(proj, result); copy_m4_m4_db(ld->conf.view_projection, proj); unit_m4_db(view);