From d5c662a305a7088e0878d863be7d44d75594831e Mon Sep 17 00:00:00 2001
From: Omar Emara <mail@OmarEmara.dev>
Date: Wed, 19 Feb 2025 13:56:06 +0100
Subject: [PATCH] Fix #134436: Compositor is much slower on Windows

The new CPU compositor in v4.4 is much slower than the old CPU
compositor in v4.3 on Windows. This is because MSVC does not inline many
of the core methods in the Result class of the compositor. To fix this,
we force inline those methods, adding a new macro for inlining methods
in the process, since the existing macro has the static keyword, which
only works for functions, not methods.

Pull Request: https://projects.blender.org/blender/blender/pulls/134748
---
 source/blender/blenlib/BLI_compiler_compat.h |  6 ++
 source/blender/compositor/COM_result.hh      | 90 +++++++++++---------
 2 files changed, 55 insertions(+), 41 deletions(-)
diff --git a/source/blender/blenlib/BLI_compiler_compat.h b/source/blender/blenlib/BLI_compiler_compat.h
index 80fb3994ecd..0b6ddac6712 100644
--- a/source/blender/blenlib/BLI_compiler_compat.h
+++ b/source/blender/blenlib/BLI_compiler_compat.h
@@ -37,6 +37,12 @@ template<typename T> static inline T decltype_helper(T x)
 #  define BLI_INLINE static inline __attribute__((always_inline)) __attribute__((__unused__))
 #endif
 
+#if defined(_MSC_VER)
+#  define BLI_INLINE_METHOD __forceinline
+#else
+#  define BLI_INLINE_METHOD inline __attribute__((always_inline)) __attribute__((__unused__))
+#endif
+
 #if defined(__GNUC__)
 #  define BLI_NOINLINE __attribute__((noinline))
 #elif defined(_MSC_VER)
diff --git a/source/blender/compositor/COM_result.hh b/source/blender/compositor/COM_result.hh
index d3c923bbd6a..ecaf7f0ab0d 100644
--- a/source/blender/compositor/COM_result.hh
+++ b/source/blender/compositor/COM_result.hh
@@ -513,12 +513,12 @@ class Result {
 /* Inline Methods.
  */
 
-inline const Domain &Result::domain() const
+BLI_INLINE_METHOD const Domain &Result::domain() const
 {
   return domain_;
 }
 
-inline int64_t Result::channels_count() const
+BLI_INLINE_METHOD int64_t Result::channels_count() const
 {
   switch (type_) {
     case ResultType::Float:
@@ -536,19 +536,19 @@ inline int64_t Result::channels_count() const
   return 4;
 }
 
-inline float *Result::float_texture() const
+BLI_INLINE_METHOD float *Result::float_texture() const
 {
   BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
   return float_texture_;
 }
 
-inline int *Result::integer_texture() const
+BLI_INLINE_METHOD int *Result::integer_texture() const
 {
   BLI_assert(storage_type_ == ResultStorageType::IntegerCPU);
   return integer_texture_;
 }
 
-inline void *Result::data() const
+BLI_INLINE_METHOD void *Result::data() const
 {
   switch (storage_type_) {
     case ResultStorageType::FloatCPU:
@@ -563,7 +563,7 @@ inline void *Result::data() const
   return nullptr;
 }
 
-template<typename T> inline const T &Result::get_single_value() const
+template<typename T> BLI_INLINE_METHOD const T &Result::get_single_value() const
 {
   BLI_assert(this->is_single_value());
   static_assert(Result::is_supported_type<T>());
@@ -597,12 +597,13 @@ template<typename T> inline const T &Result::get_single_value() const
   }
 }
 
-template<typename T> inline T &Result::get_single_value()
+template<typename T> BLI_INLINE_METHOD T &Result::get_single_value()
 {
   return const_cast<T &>(std::as_const(*this).get_single_value<T>());
 }
 
-template<typename T> inline T Result::get_single_value_default(const T &default_value) const
+template<typename T>
+BLI_INLINE_METHOD T Result::get_single_value_default(const T &default_value) const
 {
   if (this->is_single_value()) {
     return this->get_single_value<T>();
@@ -610,7 +611,7 @@ template<typename T> inline T Result::get_single_value_default(const T &default_
   return default_value;
 }
 
-template<typename T> inline void Result::set_single_value(const T &value)
+template<typename T> BLI_INLINE_METHOD void Result::set_single_value(const T &value)
 {
   BLI_assert(this->is_allocated());
   BLI_assert(this->is_single_value());
@@ -660,7 +661,8 @@ template<typename T> inline void Result::set_single_value(const T &value)
   }
 }
 
-template<typename T, bool CouldBeSingleValue> inline T Result::load_pixel(const int2 &texel) const
+template<typename T, bool CouldBeSingleValue>
+BLI_INLINE_METHOD T Result::load_pixel(const int2 &texel) const
 {
   if constexpr (CouldBeSingleValue) {
     if (is_single_value_) {
@@ -680,7 +682,7 @@ template<typename T, bool CouldBeSingleValue> inline T Result::load_pixel(const
 }
 
 template<typename T, bool CouldBeSingleValue>
-inline T Result::load_pixel_extended(const int2 &texel) const
+BLI_INLINE_METHOD T Result::load_pixel_extended(const int2 &texel) const
 {
   if constexpr (CouldBeSingleValue) {
     if (is_single_value_) {
@@ -701,7 +703,7 @@ inline T Result::load_pixel_extended(const int2 &texel) const
 }
 
 template<typename T, bool CouldBeSingleValue>
-inline T Result::load_pixel_fallback(const int2 &texel, const T &fallback) const
+BLI_INLINE_METHOD T Result::load_pixel_fallback(const int2 &texel, const T &fallback) const
 {
   if constexpr (CouldBeSingleValue) {
     if (is_single_value_) {
@@ -725,12 +727,12 @@ inline T Result::load_pixel_fallback(const int2 &texel, const T &fallback) const
 }
 
 template<typename T, bool CouldBeSingleValue>
-inline T Result::load_pixel_zero(const int2 &texel) const
+BLI_INLINE_METHOD T Result::load_pixel_zero(const int2 &texel) const
 {
   return this->load_pixel_fallback<T, CouldBeSingleValue>(texel, T(0));
 }
 
-inline float4 Result::load_pixel_generic_type(const int2 &texel) const
+BLI_INLINE_METHOD float4 Result::load_pixel_generic_type(const int2 &texel) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -742,7 +744,8 @@ inline float4 Result::load_pixel_generic_type(const int2 &texel) const
   return pixel_value;
 }
 
-template<typename T> inline void Result::store_pixel(const int2 &texel, const T &pixel_value)
+template<typename T>
+BLI_INLINE_METHOD void Result::store_pixel(const int2 &texel, const T &pixel_value)
 {
   if constexpr (std::is_scalar_v<T>) {
     *this->get_pixel<T>(texel) = pixel_value;
@@ -753,12 +756,13 @@ template<typename T> inline void Result::store_pixel(const int2 &texel, const T
   }
 }
 
-inline void Result::store_pixel_generic_type(const int2 &texel, const float4 &pixel_value)
+BLI_INLINE_METHOD void Result::store_pixel_generic_type(const int2 &texel,
+                                                        const float4 &pixel_value)
 {
   this->copy_pixel(this->get_float_pixel(texel), pixel_value);
 }
 
-inline float4 Result::sample_nearest_zero(const float2 &coordinates) const
+BLI_INLINE_METHOD float4 Result::sample_nearest_zero(const float2 &coordinates) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -779,9 +783,9 @@ inline float4 Result::sample_nearest_zero(const float2 &coordinates) const
   return pixel_value;
 }
 
-inline float4 Result::sample_nearest_wrap(const float2 &coordinates,
-                                          bool wrap_x,
-                                          bool wrap_y) const
+BLI_INLINE_METHOD float4 Result::sample_nearest_wrap(const float2 &coordinates,
+                                                     bool wrap_x,
+                                                     bool wrap_y) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -805,9 +809,9 @@ inline float4 Result::sample_nearest_wrap(const float2 &coordinates,
   return pixel_value;
 }
 
-inline float4 Result::sample_bilinear_wrap(const float2 &coordinates,
-                                           bool wrap_x,
-                                           bool wrap_y) const
+BLI_INLINE_METHOD float4 Result::sample_bilinear_wrap(const float2 &coordinates,
+                                                      bool wrap_x,
+                                                      bool wrap_y) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -831,7 +835,9 @@ inline float4 Result::sample_bilinear_wrap(const float2 &coordinates,
   return pixel_value;
 }
 
-inline float4 Result::sample_cubic_wrap(const float2 &coordinates, bool wrap_x, bool wrap_y) const
+BLI_INLINE_METHOD float4 Result::sample_cubic_wrap(const float2 &coordinates,
+                                                   bool wrap_x,
+                                                   bool wrap_y) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -855,7 +861,7 @@ inline float4 Result::sample_cubic_wrap(const float2 &coordinates, bool wrap_x,
   return pixel_value;
 }
 
-inline float4 Result::sample_bilinear_zero(const float2 &coordinates) const
+BLI_INLINE_METHOD float4 Result::sample_bilinear_zero(const float2 &coordinates) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -876,7 +882,7 @@ inline float4 Result::sample_bilinear_zero(const float2 &coordinates) const
   return pixel_value;
 }
 
-inline float4 Result::sample_nearest_extended(const float2 &coordinates) const
+BLI_INLINE_METHOD float4 Result::sample_nearest_extended(const float2 &coordinates) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -897,7 +903,7 @@ inline float4 Result::sample_nearest_extended(const float2 &coordinates) const
   return pixel_value;
 }
 
-inline float4 Result::sample_bilinear_extended(const float2 &coordinates) const
+BLI_INLINE_METHOD float4 Result::sample_bilinear_extended(const float2 &coordinates) const
 {
   float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
   if (is_single_value_) {
@@ -929,9 +935,9 @@ static void sample_ewa_extended_read_callback(void *userdata, int x, int y, floa
   copy_v4_v4(result, sampled_result);
 }
 
-inline float4 Result::sample_ewa_extended(const float2 &coordinates,
-                                          const float2 &x_gradient,
-                                          const float2 &y_gradient) const
+BLI_INLINE_METHOD float4 Result::sample_ewa_extended(const float2 &coordinates,
+                                                     const float2 &x_gradient,
+                                                     const float2 &y_gradient) const
 {
   BLI_assert(type_ == ResultType::Color);
 
@@ -966,9 +972,9 @@ static void sample_ewa_zero_read_callback(void *userdata, int x, int y, float re
   copy_v4_v4(result, sampled_result);
 }
 
-inline float4 Result::sample_ewa_zero(const float2 &coordinates,
-                                      const float2 &x_gradient,
-                                      const float2 &y_gradient) const
+BLI_INLINE_METHOD float4 Result::sample_ewa_zero(const float2 &coordinates,
+                                                 const float2 &x_gradient,
+                                                 const float2 &y_gradient) const
 {
   BLI_assert(type_ == ResultType::Color);
 
@@ -1017,7 +1023,7 @@ template<typename T> constexpr bool Result::is_supported_type()
   return is_same_any_v<T, float, int, float2, float3, float4, int2>;
 }
 
-template<typename T> inline int64_t Result::get_pixel_index(const int2 &texel) const
+template<typename T> BLI_INLINE_METHOD int64_t Result::get_pixel_index(const int2 &texel) const
 {
   BLI_assert(!is_single_value_);
   BLI_assert(this->is_allocated());
@@ -1030,7 +1036,7 @@ template<typename T> inline int64_t Result::get_pixel_index(const int2 &texel) c
   return (int64_t(texel.y) * domain_.size.x + texel.x) * channels_count;
 }
 
-inline int64_t Result::get_pixel_index(const int2 &texel) const
+BLI_INLINE_METHOD int64_t Result::get_pixel_index(const int2 &texel) const
 {
   BLI_assert(!is_single_value_);
   BLI_assert(this->is_allocated());
@@ -1039,7 +1045,7 @@ inline int64_t Result::get_pixel_index(const int2 &texel) const
 }
 
 template<typename T>
-inline std::conditional_t<Result::is_int_type<T>(), int, float> *Result::get_pixel(
+BLI_INLINE_METHOD std::conditional_t<Result::is_int_type<T>(), int, float> *Result::get_pixel(
     const int2 &texel) const
 {
   if constexpr (Result::is_int_type<T>()) {
@@ -1050,19 +1056,21 @@ inline std::conditional_t<Result::is_int_type<T>(), int, float> *Result::get_pix
   }
 }
 
-inline float *Result::get_float_pixel(const int2 &texel) const
+BLI_INLINE_METHOD float *Result::get_float_pixel(const int2 &texel) const
 {
   BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
   return float_texture_ + this->get_pixel_index(texel);
 }
 
-inline int *Result::get_integer_pixel(const int2 &texel) const
+BLI_INLINE_METHOD int *Result::get_integer_pixel(const int2 &texel) const
 {
   BLI_assert(storage_type_ == ResultStorageType::IntegerCPU);
   return integer_texture_ + this->get_pixel_index(texel);
 }
 
-inline void Result::copy_pixel(float *target, const float *source, const int channels_count)
+BLI_INLINE_METHOD void Result::copy_pixel(float *target,
+                                          const float *source,
+                                          const int channels_count)
 {
   switch (channels_count) {
     case 1:
@@ -1083,7 +1091,7 @@ inline void Result::copy_pixel(float *target, const float *source, const int cha
   }
 }
 
-inline void Result::copy_pixel(int *target, const int *source, const int channels_count)
+BLI_INLINE_METHOD void Result::copy_pixel(int *target, const int *source, const int channels_count)
 {
   switch (channels_count) {
     case 1:
@@ -1098,7 +1106,7 @@ inline void Result::copy_pixel(int *target, const int *source, const int channel
   }
 }
 
-inline void Result::copy_pixel(float *target, const float *source) const
+BLI_INLINE_METHOD void Result::copy_pixel(float *target, const float *source) const
 {
   switch (type_) {
     case ResultType::Float: