2025-02-14 11:48:37 +00:00
|
|
|
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
|
|
|
index 9b20aa043..8f33d3a3f 100755
|
|
|
|
|
--- a/CMakeLists.txt
|
|
|
|
|
+++ b/CMakeLists.txt
|
|
|
|
|
@@ -180,7 +180,12 @@ option(OCIO_USE_OIIO_FOR_APPS "Request OIIO to build apps (ociolutimage, ociocon
|
2025-05-29 14:41:20 -04:00
|
|
|
|
|
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
if (NOT APPLE)
|
|
|
|
|
- if ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(AMD64|IA64|EM64T|x86_64|X86|i386|i686)")
|
|
|
|
|
+ if("${CMAKE_GENERATOR_PLATFORM}" MATCHES "(ARM64|arm64)" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
|
|
|
|
|
+ set(OCIO_ARCH_X86 0)
|
|
|
|
|
+ set(OCIO_BUILD_ENABLE_OPTIMIZATIONS_SSE ON)
|
|
|
|
|
+ set(OCIO_BUILD_ENABLE_OPTIMIZATIONS_AVX OFF)
|
|
|
|
|
+ set(OCIO_BUILD_ENABLE_OPTIMIZATIONS_F16C OFF)
|
|
|
|
|
+ elseif ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(AMD64|IA64|EM64T|x86_64|X86|i386|i686)")
|
|
|
|
|
# Intel-based architecture (not APPLE)
|
|
|
|
|
if ("${CMAKE_SYSTEM_PROCESSOR}" MATCHES "(X86|i386|i686)")
|
|
|
|
|
set(OCIO_ARCH_X86_32 1)
|
|
|
|
|
@@ -270,7 +275,7 @@ option(OCIO_USE_AVX2 "Specify whether to enable AVX2 CPU performance optimizatio
|
|
|
|
|
option(OCIO_USE_AVX512 "Specify whether to enable AVX512 CPU performance optimizations" ${OCIO_BUILD_ENABLE_OPTIMIZATIONS_AVX})
|
|
|
|
|
option(OCIO_USE_F16C "Specify whether to enable F16C CPU performance optimizations" ${OCIO_BUILD_ENABLE_OPTIMIZATIONS_F16C})
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
-if (APPLE)
|
|
|
|
|
+if (APPLE OR WIN32)
|
|
|
|
|
# TODO: Revisit whether that option is necessary.
|
|
|
|
|
option(OCIO_USE_SSE2NEON "Specify whether to enable SSE CPU performance optimizations using SSE2NEON for Apple ARM architecture" ON)
|
|
|
|
|
mark_as_advanced(OCIO_USE_SSE2NEON)
|
|
|
|
|
@@ -332,8 +337,10 @@ if(OCIO_USE_SIMD AND OCIO_USE_SSE2NEON AND COMPILER_SUPPORTS_ARM_NEON)
|
|
|
|
|
add_library(sse2neon INTERFACE)
|
|
|
|
|
# Add the include directories to the target.
|
|
|
|
|
target_include_directories(sse2neon INTERFACE "${sse2neon_INCLUDE_DIR}")
|
|
|
|
|
- # Ignore the warnings coming from sse2neon.h as they are false positives.
|
|
|
|
|
- target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
|
|
|
|
|
+ if(NOT MSVC)
|
|
|
|
|
+ # Ignore the warnings coming from sse2neon.h as they are false positives.
|
|
|
|
|
+ target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
|
|
|
|
|
+ endif()
|
|
|
|
|
endif()
|
|
|
|
|
endif()
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
diff --git a/share/cmake/modules/install/Installsse2neon.cmake b/share/cmake/modules/install/Installsse2neon.cmake
|
|
|
|
|
index 47877436a..ced4ae139 100644
|
|
|
|
|
--- a/share/cmake/modules/install/Installsse2neon.cmake
|
|
|
|
|
+++ b/share/cmake/modules/install/Installsse2neon.cmake
|
|
|
|
|
@@ -16,7 +16,7 @@ include(FetchContent)
|
|
|
|
|
set(FETCHCONTENT_BASE_DIR "${CMAKE_BINARY_DIR}/ext/build/sse2neon")
|
|
|
|
|
FetchContent_Declare(sse2neon
|
|
|
|
|
GIT_REPOSITORY https://github.com/DLTcollab/sse2neon.git
|
|
|
|
|
- GIT_TAG v1.6.0
|
|
|
|
|
+ GIT_TAG 227cc413fb2d50b2a10073087be96b59d5364aea
|
|
|
|
|
)
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
# FetchContent_MakeAvailable is not available until CMake 3.14+.
|
|
|
|
|
@@ -38,6 +38,8 @@ if(NOT sse2neon_POPULATED)
|
|
|
|
|
add_library(sse2neon INTERFACE)
|
|
|
|
|
# Add the include directories to the target.
|
|
|
|
|
target_include_directories(sse2neon INTERFACE "${sse2neon_INCLUDE_DIR}")
|
|
|
|
|
- # Ignore the warnings coming from sse2neon.h as they are false positives.
|
|
|
|
|
- target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
|
|
|
|
|
+ if(NOT MSVC)
|
|
|
|
|
+ # Ignore the warnings coming from sse2neon.h as they are false positives.
|
|
|
|
|
+ target_compile_options(sse2neon INTERFACE -Wno-unused-parameter)
|
|
|
|
|
+ endif()
|
|
|
|
|
endif()
|
|
|
|
|
\ No newline at end of file
|
|
|
|
|
diff --git a/share/cmake/utils/CheckSupportSSEUsingSSE2NEON.cmake b/share/cmake/utils/CheckSupportSSEUsingSSE2NEON.cmake
|
|
|
|
|
index c47c8be70..d24cda55f 100644
|
|
|
|
|
--- a/share/cmake/utils/CheckSupportSSEUsingSSE2NEON.cmake
|
|
|
|
|
+++ b/share/cmake/utils/CheckSupportSSEUsingSSE2NEON.cmake
|
|
|
|
|
@@ -6,8 +6,13 @@ include(CheckCXXSourceCompiles)
|
|
|
|
|
set(_cmake_required_flags_orig "${CMAKE_REQUIRED_FLAGS}")
|
|
|
|
|
set(_cmake_required_includes_orig "${CMAKE_REQUIRED_INCLUDES}")
|
|
|
|
|
set(_cmake_osx_architectures_orig "${CMAKE_OSX_ARCHITECTURES}")
|
|
|
|
|
+set(_cmake_cxx_flags_orig "${CMAKE_CXX_FLAGS}")
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
-if(APPLE AND COMPILER_SUPPORTS_ARM_NEON)
|
|
|
|
|
+if(MSVC)
|
|
|
|
|
+ set(CMAKE_CXX_FLAGS "/Zc:preprocessor")
|
|
|
|
|
+endif()
|
|
|
|
|
+
|
|
|
|
|
+if((APPLE OR WIN32) AND COMPILER_SUPPORTS_ARM_NEON)
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
if("${CMAKE_OSX_ARCHITECTURES}" MATCHES "arm64;x86_64" OR
|
|
|
|
|
"${CMAKE_OSX_ARCHITECTURES}" MATCHES "x86_64;arm64")
|
|
|
|
|
@@ -63,8 +68,9 @@ endif()
|
|
|
|
|
set(CMAKE_REQUIRED_FLAGS "${_cmake_required_flags_orig}")
|
|
|
|
|
set(CMAKE_REQUIRED_INCLUDES "${_cmake_required_includes_orig}")
|
|
|
|
|
set(CMAKE_OSX_ARCHITECTURES "${_cmake_osx_architectures_orig}")
|
|
|
|
|
+set(CMAKE_CXX_FLAGS "${_cmake_cxx_flags_orig}")
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
unset(_cmake_required_flags_orig)
|
|
|
|
|
unset(_cmake_required_includes_orig)
|
|
|
|
|
unset(_cmake_osx_architectures_orig)
|
|
|
|
|
-
|
|
|
|
|
+unset(_cmake_cxx_flags_orig)
|
|
|
|
|
diff --git a/share/cmake/utils/CompilerFlags.cmake b/share/cmake/utils/CompilerFlags.cmake
|
|
|
|
|
index 536b5eebd..d87e73f2d 100644
|
|
|
|
|
--- a/share/cmake/utils/CompilerFlags.cmake
|
|
|
|
|
+++ b/share/cmake/utils/CompilerFlags.cmake
|
|
|
|
|
@@ -18,7 +18,12 @@ if(OCIO_USE_SIMD)
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
if (OCIO_USE_SSE2NEON AND COMPILER_SUPPORTS_ARM_NEON)
|
|
|
|
|
include(CheckSupportSSEUsingSSE2NEON)
|
|
|
|
|
- if(NOT COMPILER_SUPPORTS_SSE_WITH_SSE2NEON)
|
|
|
|
|
+ if(COMPILER_SUPPORTS_SSE_WITH_SSE2NEON)
|
|
|
|
|
+ if(WIN32 AND MSVC)
|
|
|
|
|
+ # Enable the "new" preprocessor, to more closely match Clang/GCC, required for sse2neon
|
|
|
|
|
+ set(PLATFORM_COMPILE_OPTIONS "${PLATFORM_COMPILE_OPTIONS};/Zc:preprocessor")
|
|
|
|
|
+ endif()
|
|
|
|
|
+ else()
|
|
|
|
|
set(OCIO_USE_SSE2NEON OFF)
|
|
|
|
|
endif()
|
|
|
|
|
endif()
|
|
|
|
|
diff --git a/src/OpenColorIO/CPUInfo.cpp b/src/OpenColorIO/CPUInfo.cpp
|
|
|
|
|
index dce813a4f..edf341792 100644
|
|
|
|
|
--- a/src/OpenColorIO/CPUInfo.cpp
|
|
|
|
|
+++ b/src/OpenColorIO/CPUInfo.cpp
|
|
|
|
|
@@ -183,7 +183,7 @@ CPUInfo::CPUInfo()
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
-#elif defined(__aarch64__) // ARM Processor or Apple ARM.
|
|
|
|
|
+#elif defined(__aarch64__) || defined(_M_ARM64) // ARM 64-bit processor (multiple platforms)
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
CPUInfo::CPUInfo()
|
|
|
|
|
{
|
|
|
|
|
diff --git a/src/OpenColorIO/CPUInfoConfig.h.in b/src/OpenColorIO/CPUInfoConfig.h.in
|
|
|
|
|
index b8f5045d2..c105d4159 100644
|
|
|
|
|
--- a/src/OpenColorIO/CPUInfoConfig.h.in
|
|
|
|
|
+++ b/src/OpenColorIO/CPUInfoConfig.h.in
|
|
|
|
|
@@ -6,7 +6,7 @@
|
|
|
|
|
#cmakedefine01 OCIO_ARCH_X86_32
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
// Relevant only for arm64 architecture.
|
|
|
|
|
-#if defined(__aarch64__)
|
|
|
|
|
+#if defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
#cmakedefine01 OCIO_USE_SSE2NEON
|
|
|
|
|
#else
|
|
|
|
|
#define OCIO_USE_SSE2NEON 0
|
|
|
|
|
@@ -23,7 +23,7 @@
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
// Building for x86_64 processor on a non-ARM host architecture
|
|
|
|
|
// OR Building on/for an ARM architecture and using SSE2NEON.
|
|
|
|
|
-#if (OCIO_ARCH_X86 && !defined(__aarch64__)) || (defined(__aarch64__) && OCIO_USE_SSE2NEON)
|
|
|
|
|
+#if (OCIO_ARCH_X86 && !defined(__aarch64__)) || ((defined(__aarch64__) || defined(_M_ARM64)) && OCIO_USE_SSE2NEON)
|
|
|
|
|
#cmakedefine01 OCIO_USE_SSE2
|
|
|
|
|
#cmakedefine01 OCIO_USE_SSE3
|
|
|
|
|
#cmakedefine01 OCIO_USE_SSSE3
|
|
|
|
|
diff --git a/src/OpenColorIO/SSE.h b/src/OpenColorIO/SSE.h
|
|
|
|
|
index 2494698c5..6aebc45d4 100644
|
|
|
|
|
--- a/src/OpenColorIO/SSE.h
|
|
|
|
|
+++ b/src/OpenColorIO/SSE.h
|
|
|
|
|
@@ -9,14 +9,25 @@
|
|
|
|
|
#if OCIO_USE_SSE2
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
// Include the appropriate SIMD intrinsics header based on the architecture (Intel vs. ARM).
|
|
|
|
|
-#if !defined(__aarch64__)
|
|
|
|
|
+#if !defined(__aarch64__) && !defined(_M_ARM64)
|
|
|
|
|
#if OCIO_USE_SSE2
|
|
|
|
|
#include <emmintrin.h>
|
|
|
|
|
#endif
|
|
|
|
|
-#elif defined(__aarch64__)
|
|
|
|
|
+#elif defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
// ARM architecture A64 (ARM64)
|
|
|
|
|
#if OCIO_USE_SSE2NEON
|
|
|
|
|
+ // MSVC doesn't like the redefinitions below and requires the existing functions to be undef-ed
|
|
|
|
|
+ #if defined(_M_ARM64)
|
|
|
|
|
+ #define _mm_max_ps _mm_max_ps_orig
|
|
|
|
|
+ #define _mm_min_ps _mm_min_ps_orig
|
|
|
|
|
+ #endif
|
|
|
|
|
+
|
|
|
|
|
#include <sse2neon.h>
|
|
|
|
|
+
|
|
|
|
|
+ #if defined(_M_ARM64)
|
|
|
|
|
+ #undef _mm_max_ps
|
|
|
|
|
+ #undef _mm_min_ps
|
|
|
|
|
+ #endif
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
@@ -30,7 +41,7 @@ namespace OCIO_NAMESPACE
|
|
|
|
|
// Note that it is important for the code below this ifdef stays in the OCIO_NAMESPACE since
|
|
|
|
|
// it is redefining two of the functions from sse2neon.
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
-#if defined(__aarch64__)
|
|
|
|
|
+#if defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
#if OCIO_USE_SSE2NEON
|
2025-05-29 14:41:20 -04:00
|
|
|
// Using vmaxnmq_f32 and vminnmq_f32 rather than sse2neon's vmaxq_f32 and vminq_f32 due to
|
2025-02-14 11:48:37 +00:00
|
|
|
// NaN handling. This doesn't seem to be significantly slower than the default sse2neon behavior.
|
|
|
|
|
@@ -77,6 +88,9 @@ static const __m128 EPOS128 = _mm_set1_ps(128.0f);
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
static const __m128 EPOSINF = _mm_set1_ps(std::numeric_limits<float>::infinity());
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
+// These funtions won't work when using MSVC + ARM64 unless you specify /Zc:arm64-aliased-neon-types-
|
2025-05-29 14:41:20 -04:00
|
|
|
+// This comes with its own issues, so it is easier to just disable them when using MSVC + ARM64
|
2025-02-14 11:48:37 +00:00
|
|
|
+#if !defined(_M_ARM64)
|
|
|
|
|
// Debug function to print out the contents of a floating-point SSE register
|
|
|
|
|
inline void ssePrintRegister(const char* msg, __m128& reg)
|
|
|
|
|
{
|
|
|
|
|
@@ -91,6 +105,7 @@ inline void ssePrintRegister(const char* msg, __m128i& reg)
|
|
|
|
|
int *r = (int*) ®
|
|
|
|
|
printf("%s : %d %d %d %d\n", msg, r[0], r[1], r[2], r[3]);
|
|
|
|
|
}
|
|
|
|
|
+#endif
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
// Determine whether a floating-point value is negative based on its sign bit.
|
|
|
|
|
// This function will treat special values, like -0, -NaN, -Inf, as they were indeed
|
|
|
|
|
@@ -170,7 +185,7 @@ inline __m128 sseLog2(__m128 x)
|
|
|
|
|
{
|
2025-05-29 14:41:20 -04:00
|
|
|
// y = log2( x ) = log2( 2^exponent * mantissa )
|
2025-02-14 11:48:37 +00:00
|
|
|
// = exponent + log2( mantissa )
|
|
|
|
|
-
|
2025-05-29 14:41:20 -04:00
|
|
|
+
|
2025-02-14 11:48:37 +00:00
|
|
|
__m128 mantissa
|
|
|
|
|
= _mm_or_ps( // OR with EONE
|
|
|
|
|
_mm_andnot_ps( // NOT(EMASK) AND x
|
|
|
|
|
diff --git a/src/OpenColorIO/SSE2.h b/src/OpenColorIO/SSE2.h
|
|
|
|
|
index 918694fc8..e51dad9b5 100644
|
|
|
|
|
--- a/src/OpenColorIO/SSE2.h
|
|
|
|
|
+++ b/src/OpenColorIO/SSE2.h
|
|
|
|
|
@@ -9,12 +9,35 @@
|
|
|
|
|
#if OCIO_USE_SSE2
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
// Include the appropriate SIMD intrinsics header based on the architecture (Intel vs. ARM).
|
|
|
|
|
-#if !defined(__aarch64__)
|
|
|
|
|
+#if !defined(__aarch64__) && !defined(_M_ARM64)
|
|
|
|
|
#include <emmintrin.h>
|
|
|
|
|
-#elif defined(__aarch64__)
|
|
|
|
|
+#elif defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
// ARM architecture A64 (ARM64)
|
|
|
|
|
#if OCIO_USE_SSE2NEON
|
|
|
|
|
+ // MSVC doesn't like the redefinitions below and requires the existing functions to be undef-ed
|
|
|
|
|
+ #if defined(_M_ARM64)
|
|
|
|
|
+ #define _mm_max_ps _mm_max_ps_orig
|
|
|
|
|
+ #define _mm_min_ps _mm_min_ps_orig
|
|
|
|
|
+ #endif
|
|
|
|
|
+
|
|
|
|
|
#include <sse2neon.h>
|
|
|
|
|
+
|
|
|
|
|
+ #if defined(_M_ARM64)
|
|
|
|
|
+ #undef _mm_max_ps
|
|
|
|
|
+ #undef _mm_min_ps
|
|
|
|
|
+ #endif
|
|
|
|
|
+
|
|
|
|
|
+ // Current versions of MSVC do not define float16_t, so we do it ourselves using
|
|
|
|
|
+ // int16_t as an intermediate type
|
|
|
|
|
+ #if defined(_M_ARM64) && !defined(float16_t)
|
|
|
|
|
+ #define float16_t int16_t
|
|
|
|
|
+ #endif
|
|
|
|
|
+
|
|
|
|
|
+ // Current versions of MSVC do not define vst1q_f16, so we do it ourselves using
|
|
|
|
|
+ // internal methods from MSVC's arm_neon.h
|
|
|
|
|
+ #if defined(_M_ARM64) && !defined(vst1q_f16)
|
|
|
|
|
+ #define vst1q_f16(A, B) neon_st1m_q16((A), __float16x8_t_to_n128(B));
|
|
|
|
|
+ #endif
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
@@ -30,7 +53,7 @@ namespace OCIO_NAMESPACE
|
|
|
|
|
// Note that it is important for the code below this ifdef stays in the OCIO_NAMESPACE since
|
|
|
|
|
// it is redefining two of the functions from sse2neon.
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
-#if defined(__aarch64__)
|
|
|
|
|
+#if defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
#if OCIO_USE_SSE2NEON
|
2025-05-29 14:41:20 -04:00
|
|
|
// Using vmaxnmq_f32 and vminnmq_f32 rather than sse2neon's vmaxq_f32 and vminq_f32 due to
|
2025-02-14 11:48:37 +00:00
|
|
|
// NaN handling. This doesn't seem to be significantly slower than the default sse2neon behavior.
|
|
|
|
|
@@ -321,7 +344,7 @@ struct SSE2RGBAPack<BIT_DEPTH_F16>
|
|
|
|
|
sse2RGBATranspose_4x4(r, g, b, a, rgba0, rgba1, rgba2, rgba3);
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
#if OCIO_USE_SSE2NEON
|
|
|
|
|
- // use neon hardware support for f32 to f16
|
|
|
|
|
+ // use neon hardware support for f32 to f16 (apart from in MSVC, which doesnt support it)
|
|
|
|
|
float16x8_t rgba;
|
|
|
|
|
float16x4_t rgba00_01 = vcvt_f16_f32(vreinterpretq_f32_m128(rgba0));
|
|
|
|
|
float16x4_t rgba03_03 = vcvt_f16_f32(vreinterpretq_f32_m128(rgba1));
|
|
|
|
|
diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
|
|
|
|
|
index 96adff44b..e9e977433 100644
|
|
|
|
|
--- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
|
|
|
|
|
+++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOpCPU.cpp
|
|
|
|
|
@@ -1844,7 +1844,7 @@ __m128 Renderer_LIN_TO_PQ_SSE<true>::myPower(__m128 x, __m128 exp)
|
|
|
|
|
return ssePower(x, exp);
|
|
|
|
|
}
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
-#ifdef _WIN32
|
|
|
|
|
+#if (_MSC_VER >= 1920) && (OCIO_USE_AVX)
|
|
|
|
|
// Only Windows compilers have built-in _mm_pow_ps() SVML intrinsic
|
|
|
|
|
// implementation, so non-fast SIMD version is available only on Windows for
|
|
|
|
|
// now.
|
|
|
|
|
@@ -1853,7 +1853,7 @@ __m128 Renderer_LIN_TO_PQ_SSE<false>::myPower(__m128 x, __m128 exp)
|
|
|
|
|
{
|
|
|
|
|
return _mm_pow_ps(x, exp);
|
|
|
|
|
}
|
|
|
|
|
-#endif // _WIN32
|
|
|
|
|
+#endif // (_MSC_VER >= 1920) && (OCIO_USE_AVX)
|
2025-05-29 14:41:20 -04:00
|
|
|
|
2025-02-14 11:48:37 +00:00
|
|
|
template<bool FAST_POWER>
|
|
|
|
|
void Renderer_LIN_TO_PQ_SSE<FAST_POWER>::apply(const void* inImg, void* outImg, long numPixels) const
|