From d5c662a305a7088e0878d863be7d44d75594831e Mon Sep 17 00:00:00 2001 From: Omar Emara Date: Wed, 19 Feb 2025 13:56:06 +0100 Subject: [PATCH] Fix #134436: Compositor is much slower on Windows The new CPU compositor in v4.4 is much slower than the old CPU compositor in v4.3 on Windows. This is because MSVC does not inline many of the core methods in the Result class of the compositor. To fix this, we force inline those methods, adding a new macro for inlining methods in the process, since the existing macro has the static keyword, which only works for functions, not methods. Pull Request: https://projects.blender.org/blender/blender/pulls/134748 --- source/blender/blenlib/BLI_compiler_compat.h | 6 ++ source/blender/compositor/COM_result.hh | 90 +++++++++++--------- 2 files changed, 55 insertions(+), 41 deletions(-) diff --git a/source/blender/blenlib/BLI_compiler_compat.h b/source/blender/blenlib/BLI_compiler_compat.h index 80fb3994ecd..0b6ddac6712 100644 --- a/source/blender/blenlib/BLI_compiler_compat.h +++ b/source/blender/blenlib/BLI_compiler_compat.h @@ -37,6 +37,12 @@ template static inline T decltype_helper(T x) # define BLI_INLINE static inline __attribute__((always_inline)) __attribute__((__unused__)) #endif +#if defined(_MSC_VER) +# define BLI_INLINE_METHOD __forceinline +#else +# define BLI_INLINE_METHOD inline __attribute__((always_inline)) __attribute__((__unused__)) +#endif + #if defined(__GNUC__) # define BLI_NOINLINE __attribute__((noinline)) #elif defined(_MSC_VER) diff --git a/source/blender/compositor/COM_result.hh b/source/blender/compositor/COM_result.hh index d3c923bbd6a..ecaf7f0ab0d 100644 --- a/source/blender/compositor/COM_result.hh +++ b/source/blender/compositor/COM_result.hh @@ -513,12 +513,12 @@ class Result { /* Inline Methods. */ -inline const Domain &Result::domain() const +BLI_INLINE_METHOD const Domain &Result::domain() const { return domain_; } -inline int64_t Result::channels_count() const +BLI_INLINE_METHOD int64_t Result::channels_count() const { switch (type_) { case ResultType::Float: @@ -536,19 +536,19 @@ inline int64_t Result::channels_count() const return 4; } -inline float *Result::float_texture() const +BLI_INLINE_METHOD float *Result::float_texture() const { BLI_assert(storage_type_ == ResultStorageType::FloatCPU); return float_texture_; } -inline int *Result::integer_texture() const +BLI_INLINE_METHOD int *Result::integer_texture() const { BLI_assert(storage_type_ == ResultStorageType::IntegerCPU); return integer_texture_; } -inline void *Result::data() const +BLI_INLINE_METHOD void *Result::data() const { switch (storage_type_) { case ResultStorageType::FloatCPU: @@ -563,7 +563,7 @@ inline void *Result::data() const return nullptr; } -template inline const T &Result::get_single_value() const +template BLI_INLINE_METHOD const T &Result::get_single_value() const { BLI_assert(this->is_single_value()); static_assert(Result::is_supported_type()); @@ -597,12 +597,13 @@ template inline const T &Result::get_single_value() const } } -template inline T &Result::get_single_value() +template BLI_INLINE_METHOD T &Result::get_single_value() { return const_cast(std::as_const(*this).get_single_value()); } -template inline T Result::get_single_value_default(const T &default_value) const +template +BLI_INLINE_METHOD T Result::get_single_value_default(const T &default_value) const { if (this->is_single_value()) { return this->get_single_value(); @@ -610,7 +611,7 @@ template inline T Result::get_single_value_default(const T &default_ return default_value; } -template inline void Result::set_single_value(const T &value) +template BLI_INLINE_METHOD void Result::set_single_value(const T &value) { BLI_assert(this->is_allocated()); BLI_assert(this->is_single_value()); @@ -660,7 +661,8 @@ template inline void Result::set_single_value(const T &value) } } -template inline T Result::load_pixel(const int2 &texel) const +template +BLI_INLINE_METHOD T Result::load_pixel(const int2 &texel) const { if constexpr (CouldBeSingleValue) { if (is_single_value_) { @@ -680,7 +682,7 @@ template inline T Result::load_pixel(const } template -inline T Result::load_pixel_extended(const int2 &texel) const +BLI_INLINE_METHOD T Result::load_pixel_extended(const int2 &texel) const { if constexpr (CouldBeSingleValue) { if (is_single_value_) { @@ -701,7 +703,7 @@ inline T Result::load_pixel_extended(const int2 &texel) const } template -inline T Result::load_pixel_fallback(const int2 &texel, const T &fallback) const +BLI_INLINE_METHOD T Result::load_pixel_fallback(const int2 &texel, const T &fallback) const { if constexpr (CouldBeSingleValue) { if (is_single_value_) { @@ -725,12 +727,12 @@ inline T Result::load_pixel_fallback(const int2 &texel, const T &fallback) const } template -inline T Result::load_pixel_zero(const int2 &texel) const +BLI_INLINE_METHOD T Result::load_pixel_zero(const int2 &texel) const { return this->load_pixel_fallback(texel, T(0)); } -inline float4 Result::load_pixel_generic_type(const int2 &texel) const +BLI_INLINE_METHOD float4 Result::load_pixel_generic_type(const int2 &texel) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -742,7 +744,8 @@ inline float4 Result::load_pixel_generic_type(const int2 &texel) const return pixel_value; } -template inline void Result::store_pixel(const int2 &texel, const T &pixel_value) +template +BLI_INLINE_METHOD void Result::store_pixel(const int2 &texel, const T &pixel_value) { if constexpr (std::is_scalar_v) { *this->get_pixel(texel) = pixel_value; @@ -753,12 +756,13 @@ template inline void Result::store_pixel(const int2 &texel, const T } } -inline void Result::store_pixel_generic_type(const int2 &texel, const float4 &pixel_value) +BLI_INLINE_METHOD void Result::store_pixel_generic_type(const int2 &texel, + const float4 &pixel_value) { this->copy_pixel(this->get_float_pixel(texel), pixel_value); } -inline float4 Result::sample_nearest_zero(const float2 &coordinates) const +BLI_INLINE_METHOD float4 Result::sample_nearest_zero(const float2 &coordinates) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -779,9 +783,9 @@ inline float4 Result::sample_nearest_zero(const float2 &coordinates) const return pixel_value; } -inline float4 Result::sample_nearest_wrap(const float2 &coordinates, - bool wrap_x, - bool wrap_y) const +BLI_INLINE_METHOD float4 Result::sample_nearest_wrap(const float2 &coordinates, + bool wrap_x, + bool wrap_y) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -805,9 +809,9 @@ inline float4 Result::sample_nearest_wrap(const float2 &coordinates, return pixel_value; } -inline float4 Result::sample_bilinear_wrap(const float2 &coordinates, - bool wrap_x, - bool wrap_y) const +BLI_INLINE_METHOD float4 Result::sample_bilinear_wrap(const float2 &coordinates, + bool wrap_x, + bool wrap_y) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -831,7 +835,9 @@ inline float4 Result::sample_bilinear_wrap(const float2 &coordinates, return pixel_value; } -inline float4 Result::sample_cubic_wrap(const float2 &coordinates, bool wrap_x, bool wrap_y) const +BLI_INLINE_METHOD float4 Result::sample_cubic_wrap(const float2 &coordinates, + bool wrap_x, + bool wrap_y) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -855,7 +861,7 @@ inline float4 Result::sample_cubic_wrap(const float2 &coordinates, bool wrap_x, return pixel_value; } -inline float4 Result::sample_bilinear_zero(const float2 &coordinates) const +BLI_INLINE_METHOD float4 Result::sample_bilinear_zero(const float2 &coordinates) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -876,7 +882,7 @@ inline float4 Result::sample_bilinear_zero(const float2 &coordinates) const return pixel_value; } -inline float4 Result::sample_nearest_extended(const float2 &coordinates) const +BLI_INLINE_METHOD float4 Result::sample_nearest_extended(const float2 &coordinates) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -897,7 +903,7 @@ inline float4 Result::sample_nearest_extended(const float2 &coordinates) const return pixel_value; } -inline float4 Result::sample_bilinear_extended(const float2 &coordinates) const +BLI_INLINE_METHOD float4 Result::sample_bilinear_extended(const float2 &coordinates) const { float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f); if (is_single_value_) { @@ -929,9 +935,9 @@ static void sample_ewa_extended_read_callback(void *userdata, int x, int y, floa copy_v4_v4(result, sampled_result); } -inline float4 Result::sample_ewa_extended(const float2 &coordinates, - const float2 &x_gradient, - const float2 &y_gradient) const +BLI_INLINE_METHOD float4 Result::sample_ewa_extended(const float2 &coordinates, + const float2 &x_gradient, + const float2 &y_gradient) const { BLI_assert(type_ == ResultType::Color); @@ -966,9 +972,9 @@ static void sample_ewa_zero_read_callback(void *userdata, int x, int y, float re copy_v4_v4(result, sampled_result); } -inline float4 Result::sample_ewa_zero(const float2 &coordinates, - const float2 &x_gradient, - const float2 &y_gradient) const +BLI_INLINE_METHOD float4 Result::sample_ewa_zero(const float2 &coordinates, + const float2 &x_gradient, + const float2 &y_gradient) const { BLI_assert(type_ == ResultType::Color); @@ -1017,7 +1023,7 @@ template constexpr bool Result::is_supported_type() return is_same_any_v; } -template inline int64_t Result::get_pixel_index(const int2 &texel) const +template BLI_INLINE_METHOD int64_t Result::get_pixel_index(const int2 &texel) const { BLI_assert(!is_single_value_); BLI_assert(this->is_allocated()); @@ -1030,7 +1036,7 @@ template inline int64_t Result::get_pixel_index(const int2 &texel) c return (int64_t(texel.y) * domain_.size.x + texel.x) * channels_count; } -inline int64_t Result::get_pixel_index(const int2 &texel) const +BLI_INLINE_METHOD int64_t Result::get_pixel_index(const int2 &texel) const { BLI_assert(!is_single_value_); BLI_assert(this->is_allocated()); @@ -1039,7 +1045,7 @@ inline int64_t Result::get_pixel_index(const int2 &texel) const } template -inline std::conditional_t(), int, float> *Result::get_pixel( +BLI_INLINE_METHOD std::conditional_t(), int, float> *Result::get_pixel( const int2 &texel) const { if constexpr (Result::is_int_type()) { @@ -1050,19 +1056,21 @@ inline std::conditional_t(), int, float> *Result::get_pix } } -inline float *Result::get_float_pixel(const int2 &texel) const +BLI_INLINE_METHOD float *Result::get_float_pixel(const int2 &texel) const { BLI_assert(storage_type_ == ResultStorageType::FloatCPU); return float_texture_ + this->get_pixel_index(texel); } -inline int *Result::get_integer_pixel(const int2 &texel) const +BLI_INLINE_METHOD int *Result::get_integer_pixel(const int2 &texel) const { BLI_assert(storage_type_ == ResultStorageType::IntegerCPU); return integer_texture_ + this->get_pixel_index(texel); } -inline void Result::copy_pixel(float *target, const float *source, const int channels_count) +BLI_INLINE_METHOD void Result::copy_pixel(float *target, + const float *source, + const int channels_count) { switch (channels_count) { case 1: @@ -1083,7 +1091,7 @@ inline void Result::copy_pixel(float *target, const float *source, const int cha } } -inline void Result::copy_pixel(int *target, const int *source, const int channels_count) +BLI_INLINE_METHOD void Result::copy_pixel(int *target, const int *source, const int channels_count) { switch (channels_count) { case 1: @@ -1098,7 +1106,7 @@ inline void Result::copy_pixel(int *target, const int *source, const int channel } } -inline void Result::copy_pixel(float *target, const float *source) const +BLI_INLINE_METHOD void Result::copy_pixel(float *target, const float *source) const { switch (type_) { case ResultType::Float: