Windows: Enable OpenPGL on ARM64 machines
This is required to reach feature completeness with x64 platforms. Also patches TBB headers in order to fix a few build errors when compiling OpenPGL. Pull Request: https://projects.blender.org/blender/blender/pulls/127705
This commit is contained in:
@@ -100,7 +100,9 @@ if(NOT APPLE)
|
||||
endif()
|
||||
endif()
|
||||
include(cmake/ispc.cmake)
|
||||
if(NOT BLENDER_PLATFORM_WINDOWS_ARM)
|
||||
if(BLENDER_PLATFORM_WINDOWS_ARM)
|
||||
include(cmake/openpgl_windows_arm.cmake)
|
||||
else()
|
||||
include(cmake/openpgl.cmake)
|
||||
endif()
|
||||
# Embree needs to be included after dpcpp as it uses it for compiling with GPU support
|
||||
|
||||
120
build_files/build_environment/cmake/openpgl_windows_arm.cmake
Normal file
120
build_files/build_environment/cmake/openpgl_windows_arm.cmake
Normal file
@@ -0,0 +1,120 @@
|
||||
# SPDX-FileCopyrightText: 2022-2024 Blender Authors
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
# Note the utility apps may use png/tiff/gif system libraries, but the
|
||||
# library itself does not depend on them, so should give no problems.
|
||||
|
||||
set(OPENPGL_LLVM_INSTALL_PATH ${LIBDIR}/llvm)
|
||||
|
||||
set(OPENPGL_EXTRA_ARGS
|
||||
-DOPENPGL_BUILD_STATIC=ON
|
||||
-DOPENPGL_TBB_ROOT=${LIBDIR}/tbb
|
||||
-DTBB_ROOT=${LIBDIR}/tbb
|
||||
-DCMAKE_DEBUG_POSTFIX=_d
|
||||
-DCMAKE_CXX_COMPILER=${OPENPGL_LLVM_INSTALL_PATH}/bin/clang-cl.exe
|
||||
-DCMAKE_C_COMPILER=${OPENPGL_LLVM_INSTALL_PATH}/bin/clang-cl.exe
|
||||
-DCMAKE_C_FLAGS_INIT="--target=arm64-pc-windows-msvc"
|
||||
-DCMAKE_CXX_FLAGS_INIT="--target=arm64-pc-windows-msvc"
|
||||
-DCMAKE_SHARED_LINKER_FLAGS=-L"${LIBDIR}/../../VS1564R/Release/llvm/lib"
|
||||
)
|
||||
|
||||
if(TBB_STATIC_LIBRARY)
|
||||
set(OPENPGL_EXTRA_ARGS
|
||||
${OPENPGL_EXTRA_ARGS}
|
||||
-DOPENPGL_TBB_COMPONENT=tbb_static
|
||||
)
|
||||
endif()
|
||||
|
||||
# We want the VS2019 tools for OpenPGL, as they are stable.
|
||||
# We cannot use VS2022 easily, unless we specify an older (unsupported) toolset such as 17.35,
|
||||
# as the newer toolsets mandate newer versions of LLVM, which we cannot use currently,
|
||||
# due to lack of support in OSL and ISPC.
|
||||
set(OPENPGL_VCTOOLS_REQUIRED_VERSION 14.29)
|
||||
|
||||
# Extract the list of installed tools that match the required version from the
|
||||
# `VCToolsInstallDir` env var
|
||||
file(TO_CMAKE_PATH $ENV{VCToolsInstallDir} OPENPGL_VCTOOLSINSTALLDIR_PATH)
|
||||
cmake_path(GET OPENPGL_VCTOOLSINSTALLDIR_PATH PARENT_PATH OPENPGL_VCTOOLSDIR_PATH)
|
||||
file(GLOB OPENPGL_INSTALLED_VCTOOLS RELATIVE ${OPENPGL_VCTOOLSDIR_PATH} ${OPENPGL_VCTOOLSDIR_PATH}/${OPENPGL_VCTOOLS_REQUIRED_VERSION}*)
|
||||
|
||||
# Check that at least one the installed tool versions
|
||||
# (there may be different subversions) is present.
|
||||
if(NOT OPENPGL_INSTALLED_VCTOOLS)
|
||||
message(FATAL_ERROR "When building for Windows ARM64 platforms, OpenPGL requires VC Tools ${OPENPGL_VCTOOLS_REQUIRED_VERSION} to be installed alongside the current version.")
|
||||
endif()
|
||||
|
||||
# Get the last item in the list (latest, when list is sorted)
|
||||
list(SORT OPENPGL_INSTALLED_VCTOOLS)
|
||||
list(GET OPENPGL_INSTALLED_VCTOOLS -1 OPENPGL_VCTOOLS_VERSION)
|
||||
|
||||
# Configure our in file and temporarily store it in the build dir
|
||||
# (with modified extension so nothing else picks it up)
|
||||
# This feels icky, but boost does something similar, and we haven't called
|
||||
# `ExternalProject_Add` yet, so the OpenPGL dir does not yet exist.
|
||||
configure_file(
|
||||
${PATCH_DIR}/openpgl_Directory.Build.Props.in
|
||||
${BUILD_DIR}/openpgl_Directory.Build.Props_temp
|
||||
)
|
||||
|
||||
# Set the patch command to copy the configured build props file in,
|
||||
# and also a newer version of sse2neon
|
||||
set(OPENPGL_PATCH_COMMAND
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${BUILD_DIR}/openpgl_Directory.Build.Props_temp
|
||||
${BUILD_DIR}/openpgl/src/external_openpgl-build/Directory.Build.Props
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${BUILD_DIR}/sse2neon/src/external_sse2neon/sse2neon.h
|
||||
${BUILD_DIR}/openpgl/src/external_openpgl/third-party/embreeSrc/common/simd/arm/sse2neon.h &&
|
||||
${PATCH_CMD} -p 1 -d ${BUILD_DIR}/openpgl/src/external_openpgl < ${PATCH_DIR}/openpgl_windows_arm.diff
|
||||
)
|
||||
|
||||
# This all only works if we use the VS generator (with `clangcl` toolset), so ensure we use that
|
||||
# Note: there is literally no way to get ninja to use a different toolset other than manually
|
||||
# overwriting every env var, or calling a nested `vcvarsall`, both of which are *messy*.
|
||||
set(OPENPGL_GENERATOR ${CMAKE_GENERATOR})
|
||||
set(OPENPGL_GENERATOR_TOOLSET ClangCL)
|
||||
|
||||
ExternalProject_Add(external_openpgl
|
||||
URL file://${PACKAGE_DIR}/${OPENPGL_FILE}
|
||||
DOWNLOAD_DIR ${DOWNLOAD_DIR}
|
||||
URL_HASH ${OPENPGL_HASH_TYPE}=${OPENPGL_HASH}
|
||||
CMAKE_GENERATOR ${OPENPGL_GENERATOR}
|
||||
CMAKE_GENERATOR_TOOLSET ${OPENPGL_GENERATOR_TOOLSET}
|
||||
PREFIX ${BUILD_DIR}/openpgl
|
||||
PATCH_COMMAND ${OPENPGL_PATCH_COMMAND}
|
||||
|
||||
CMAKE_ARGS
|
||||
-DCMAKE_INSTALL_PREFIX=${LIBDIR}/openpgl -DCMAKE_BUILD_TYPE=${BUILD_MODE}
|
||||
${DEFAULT_CMAKE_FLAGS}
|
||||
${OPENPGL_EXTRA_ARGS}
|
||||
|
||||
INSTALL_DIR ${LIBDIR}/openpgl
|
||||
)
|
||||
|
||||
add_dependencies(
|
||||
external_openpgl
|
||||
external_tbb
|
||||
external_sse2neon
|
||||
)
|
||||
|
||||
if(BUILD_MODE STREQUAL Release)
|
||||
ExternalProject_Add_Step(external_openpgl after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory
|
||||
${LIBDIR}/openpgl
|
||||
${HARVEST_TARGET}/openpgl
|
||||
|
||||
DEPENDEES install
|
||||
)
|
||||
else()
|
||||
ExternalProject_Add_Step(external_openpgl after_install
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${LIBDIR}/openpgl/lib/openpgl_d.lib
|
||||
${HARVEST_TARGET}/openpgl/lib/openpgl_d.lib
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${LIBDIR}/openpgl/lib/cmake/openpgl-${OPENPGL_SHORT_VERSION}/openpgl_Exports-debug.cmake
|
||||
${HARVEST_TARGET}/openpgl/lib/cmake/openpgl-${OPENPGL_SHORT_VERSION}/openpgl_Exports-debug.cmake
|
||||
|
||||
DEPENDEES install
|
||||
)
|
||||
endif()
|
||||
@@ -0,0 +1,7 @@
|
||||
<Project>
|
||||
<PropertyGroup>
|
||||
<LLVMInstallDir>${OPENPGL_LLVM_INSTALL_PATH}</LLVMInstallDir>
|
||||
<LLVMToolsVersion>${LLVM_VERSION}</LLVMToolsVersion>
|
||||
<VCToolsVersion>${OPENPGL_VCTOOLS_VERSION}</VCToolsVersion>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
124
build_files/build_environment/patches/openpgl_windows_arm.diff
Normal file
124
build_files/build_environment/patches/openpgl_windows_arm.diff
Normal file
@@ -0,0 +1,124 @@
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index ec486ee..cc77876 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -35,7 +35,7 @@ SET(OPENPGL_ARM OFF)
|
||||
IF (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_OSX_ARCHITECTURES MATCHES "arm64"))
|
||||
MESSAGE(STATUS "Building for Apple silicon")
|
||||
SET(OPENPGL_ARM ON)
|
||||
-ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
|
||||
+ELSEIF(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")
|
||||
MESSAGE(STATUS "Building for AArch64")
|
||||
SET(OPENPGL_ARM ON)
|
||||
ENDIF()
|
||||
diff --git a/openpgl/CMakeLists.txt b/openpgl/CMakeLists.txt
|
||||
index 4bcccbe..1199f22 100644
|
||||
--- a/openpgl/CMakeLists.txt
|
||||
+++ b/openpgl/CMakeLists.txt
|
||||
@@ -60,7 +60,8 @@ message(STATUS "Compiler: ${CMAKE_CXX_COMPILER_ID}")
|
||||
message(STATUS "Arch: ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
|
||||
if(WIN32)
|
||||
- if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
+ # Here we chack for MSVC, or Clang pretending to be MSVC via Clang-CL
|
||||
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC"))
|
||||
set(OPENPGL_RELEASE_OPTIONS /Ox /Oi)
|
||||
set(OPENPGL_COMMON_OPTIONS /fp:precise)
|
||||
#set(OPENPGL_RELEASE_OPTIONS ${OPENPGL_RELEASE_OPTIONS} -ftree-vectorize -mfpmath=sse -funsafe-math-optimizations -fno-rounding-math -fno-signaling-nans -fno-math-errno -fomit-frame-pointer )
|
||||
@@ -77,6 +78,14 @@ if(WIN32)
|
||||
if(OPENPGL_ISA_AVX512)
|
||||
set_source_files_properties(api/deviceCPU16.cpp PROPERTIES COMPILE_FLAGS "/D__SSE__ /D__SSE2__ /D__SSE3__ /D__SSE4_1__ /D__SSE4_2__ /arch:AVX /arch:AVX2 /arch:AVX512")
|
||||
endif()
|
||||
+ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
+ if(OPENPGL_ISA_NEON)
|
||||
+ set_source_files_properties(api/deviceCPU4.cpp PROPERTIES COMPILE_FLAGS "/D__SSE4_2__ /D__SSE4_1__")
|
||||
+ endif()
|
||||
+ if(OPENPGL_ISA_NEON2X)
|
||||
+ set_source_files_properties(api/deviceCPU8.cpp PROPERTIES COMPILE_FLAGS "/D__AVX2__ /D__AVX__ /D__SSE4_2__ /D__SSE4_1__ /D__BMI__ /D__BMI2__ /D__LZCNT__")
|
||||
+ endif()
|
||||
+ endif()
|
||||
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM" OR CMAKE_CXX_COMPILER_ID STREQUAL "dpcpp")
|
||||
set(OPENPGL_RELEASE_OPTIONS -O3)
|
||||
set(OPENPGL_COMMON_OPTIONS -Wall)
|
||||
diff --git a/third-party/embreeSrc/common/simd/arm/sse2neon.h b/third-party/embreeSrc/common/simd/arm/sse2neon.h
|
||||
index 99831e3..a2cefbc 100644
|
||||
--- a/third-party/embreeSrc/common/simd/arm/sse2neon.h
|
||||
+++ b/third-party/embreeSrc/common/simd/arm/sse2neon.h
|
||||
@@ -336,6 +336,14 @@ FORCE_INLINE void _sse2neon_smp_mb(void)
|
||||
* argument "a" of mm_shuffle_ps that will be places in fp1 of result.
|
||||
* fp0 is the same for fp0 of result.
|
||||
*/
|
||||
+#if defined(__aarch64__)
|
||||
+#define _MN_SHUFFLE(fp3,fp2,fp1,fp0) ( (uint8x16_t){ (((fp3)*4)+0), (((fp3)*4)+1), (((fp3)*4)+2), (((fp3)*4)+3), (((fp2)*4)+0), (((fp2)*4)+1), (((fp2)*4)+\
|
||||
+2), (((fp2)*4)+3), (((fp1)*4)+0), (((fp1)*4)+1), (((fp1)*4)+2), (((fp1)*4)+3), (((fp0)*4)+0), (((fp0)*4)+1), (((fp0)*4)+2), (((fp0)*4)+3) } )
|
||||
+#define _MF_SHUFFLE(fp3,fp2,fp1,fp0) ( (uint8x16_t){ (((fp3)*4)+0), (((fp3)*4)+1), (((fp3)*4)+2), (((fp3)*4)+3), (((fp2)*4)+0), (((fp2)*4)+1), (((fp2)*4)+\
|
||||
+2), (((fp2)*4)+3), (((fp1)*4)+16+0), (((fp1)*4)+16+1), (((fp1)*4)+16+2), (((fp1)*4)+16+3), (((fp0)*4)+16+0), (((fp0)*4)+16+1), (((fp0)*4)+16+2), (((fp0)*\
|
||||
+4)+16+3) } )
|
||||
+#endif
|
||||
+
|
||||
#define _MM_SHUFFLE(fp3, fp2, fp1, fp0) \
|
||||
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
|
||||
|
||||
@@ -2822,7 +2830,7 @@ FORCE_INLINE void _mm_stream_pi(__m64 *p, __m64 a)
|
||||
FORCE_INLINE void _mm_stream_ps(float *p, __m128 a)
|
||||
{
|
||||
#if __has_builtin(__builtin_nontemporal_store)
|
||||
- __builtin_nontemporal_store(a, (float32x4_t *) p);
|
||||
+ __builtin_nontemporal_store(reinterpret_cast<float32x4_t>(a), (float32x4_t *) p);
|
||||
#else
|
||||
vst1q_f32(p, vreinterpretq_f32_m128(a));
|
||||
#endif
|
||||
@@ -5660,7 +5668,7 @@ FORCE_INLINE void _mm_storeu_si32(void *p, __m128i a)
|
||||
FORCE_INLINE void _mm_stream_pd(double *p, __m128d a)
|
||||
{
|
||||
#if __has_builtin(__builtin_nontemporal_store)
|
||||
- __builtin_nontemporal_store(a, (__m128d *) p);
|
||||
+ __builtin_nontemporal_store(reinterpret_cast<float32x4_t>(a), (float32x4_t *) p);
|
||||
#elif defined(__aarch64__) || defined(_M_ARM64)
|
||||
vst1q_f64(p, vreinterpretq_f64_m128d(a));
|
||||
#else
|
||||
@@ -6809,14 +6817,14 @@ FORCE_INLINE __m64 _mm_sign_pi8(__m64 _a, __m64 _b)
|
||||
_sse2neon_define2( \
|
||||
__m128i, a, b, \
|
||||
const uint16_t _mask[8] = \
|
||||
- _sse2neon_init(((imm) & (1 << 0)) ? (uint16_t) -1 : 0x0, \
|
||||
- ((imm) & (1 << 1)) ? (uint16_t) -1 : 0x0, \
|
||||
- ((imm) & (1 << 2)) ? (uint16_t) -1 : 0x0, \
|
||||
- ((imm) & (1 << 3)) ? (uint16_t) -1 : 0x0, \
|
||||
- ((imm) & (1 << 4)) ? (uint16_t) -1 : 0x0, \
|
||||
- ((imm) & (1 << 5)) ? (uint16_t) -1 : 0x0, \
|
||||
- ((imm) & (1 << 6)) ? (uint16_t) -1 : 0x0, \
|
||||
- ((imm) & (1 << 7)) ? (uint16_t) -1 : 0x0); \
|
||||
+ _sse2neon_init(((imm) & (1 << 0)) ? (uint16_t)0xffff : (uint16_t)0x0000, \
|
||||
+ ((imm) & (1 << 1)) ? (uint16_t)0xffff : (uint16_t)0x0000, \
|
||||
+ ((imm) & (1 << 2)) ? (uint16_t)0xffff : (uint16_t)0x0000, \
|
||||
+ ((imm) & (1 << 3)) ? (uint16_t)0xffff : (uint16_t)0x0000, \
|
||||
+ ((imm) & (1 << 4)) ? (uint16_t)0xffff : (uint16_t)0x0000, \
|
||||
+ ((imm) & (1 << 5)) ? (uint16_t)0xffff : (uint16_t)0x0000, \
|
||||
+ ((imm) & (1 << 6)) ? (uint16_t)0xffff : (uint16_t)0x0000, \
|
||||
+ ((imm) & (1 << 7)) ? (uint16_t)0xffff : (uint16_t)0x0000); \
|
||||
uint16x8_t _mask_vec = vld1q_u16(_mask); \
|
||||
uint16x8_t __a = vreinterpretq_u16_m128i(_a); \
|
||||
uint16x8_t __b = vreinterpretq_u16_m128i(_b); _sse2neon_return( \
|
||||
diff --git a/third-party/embreeSrc/common/sys/intrinsics.h b/third-party/embreeSrc/common/sys/intrinsics.h
|
||||
index f5074bb..b0511b5 100644
|
||||
--- a/third-party/embreeSrc/common/sys/intrinsics.h
|
||||
+++ b/third-party/embreeSrc/common/sys/intrinsics.h
|
||||
@@ -91,7 +91,7 @@ namespace embree
|
||||
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bsf(size_t v) {
|
||||
-#if defined(__AVX2__)
|
||||
+#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return _tzcnt_u64(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanForward64(&r,v); return r;
|
||||
@@ -140,7 +140,7 @@ namespace embree
|
||||
|
||||
#if defined(__X86_64__) || defined (__aarch64__)
|
||||
__forceinline size_t bsr(size_t v) {
|
||||
-#if defined(__AVX2__)
|
||||
+#if defined(__AVX2__) && !defined(__aarch64__)
|
||||
return 63 -_lzcnt_u64(v);
|
||||
#else
|
||||
unsigned long r = 0; _BitScanReverse64(&r, v); return r;
|
||||
@@ -171,6 +171,23 @@ index 00000000..13d56678
|
||||
+ _InterlockedAnd((long volatile *)operand, (long)addend);
|
||||
+}
|
||||
+
|
||||
diff --git a/include/tbb/tbb_config.h b/include/tbb/tbb_config.h
|
||||
index 7a8d06a0..80b4e3a6 100644
|
||||
--- a/include/tbb/tbb_config.h
|
||||
+++ b/include/tbb/tbb_config.h
|
||||
@@ -209,10 +209,10 @@
|
||||
#elif __clang__
|
||||
/** TODO: these options need to be rechecked **/
|
||||
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __has_feature(__cxx_variadic_templates__)
|
||||
- #define __TBB_CPP11_RVALUE_REF_PRESENT (__has_feature(__cxx_rvalue_references__) && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40500))
|
||||
+ #define __TBB_CPP11_RVALUE_REF_PRESENT (__has_feature(__cxx_rvalue_references__) && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40500 || _MSC_VER >= 1700))
|
||||
#define __TBB_IMPLICIT_MOVE_PRESENT __has_feature(cxx_implicit_moves)
|
||||
/** TODO: extend exception_ptr related conditions to cover libstdc++ **/
|
||||
- #define __TBB_EXCEPTION_PTR_PRESENT (__cplusplus >= 201103L && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40600))
|
||||
+ #define __TBB_EXCEPTION_PTR_PRESENT (__cplusplus >= 201103L && (_LIBCPP_VERSION || __TBB_GLIBCXX_VERSION >= 40600 || _MSC_VER >= 1600))
|
||||
#define __TBB_STATIC_ASSERT_PRESENT __has_feature(__cxx_static_assert__)
|
||||
#if (__cplusplus >= 201103L && __has_include(<tuple>))
|
||||
#define __TBB_CPP11_TUPLE_PRESENT 1
|
||||
diff --git a/include/tbb/tbb_machine.h b/include/tbb/tbb_machine.h
|
||||
index 9752be58..ebb98ec2 100644
|
||||
--- a/include/tbb/tbb_machine.h
|
||||
|
||||
Reference in New Issue
Block a user