From bdb093f58debd3fa0047f4a4db553285fa268580 Mon Sep 17 00:00:00 2001 From: Xavier Hallade Date: Thu, 12 Dec 2024 15:58:55 +0100 Subject: [PATCH] Build: upgrade DPC++/Level-Zero to 6.0.0-rc1/1.19.2 releases including a backport of device image compression: https://github.com/intel/llvm/pull/15124 that can be enabled by adding `--offload-compress` compiler option. --- .../build_environment/cmake/dpcpp.cmake | 25 +- .../build_environment/cmake/dpcpp_deps.cmake | 6 + .../build_environment/cmake/versions.cmake | 33 +- .../build_environment/patches/dpcpp.diff | 258 ++-- .../patches/dpcpp_13328.diff | 12 - .../patches/dpcpp_15124.diff | 1036 +++++++++++++++++ .../build_environment/patches/level-zero.diff | 27 +- .../patches/unifiedmemoryframework.diff | 13 + .../patches/unifiedruntime.diff | 13 + .../cmake/platform/platform_unix.cmake | 2 +- .../cmake/platform/platform_win32.cmake | 36 +- 11 files changed, 1271 insertions(+), 190 deletions(-) delete mode 100644 build_files/build_environment/patches/dpcpp_13328.diff create mode 100644 build_files/build_environment/patches/dpcpp_15124.diff create mode 100644 build_files/build_environment/patches/unifiedmemoryframework.diff create mode 100644 build_files/build_environment/patches/unifiedruntime.diff diff --git a/build_files/build_environment/cmake/dpcpp.cmake b/build_files/build_environment/cmake/dpcpp.cmake index 5f30b46df37..a555c092d4d 100644 --- a/build_files/build_environment/cmake/dpcpp.cmake +++ b/build_files/build_environment/cmake/dpcpp.cmake @@ -46,6 +46,9 @@ set(DPCPP_EXTRA_ARGS -DSYCL_PI_UR_USE_FETCH_CONTENT=OFF -DSYCL_PI_UR_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/ -DFETCHCONTENT_SOURCE_DIR_UNIFIED-MEMORY-FRAMEWORK=${BUILD_DIR}/unifiedmemoryframework/src/external_unifiedmemoryframework/ + -DSYCL_UMF_DISABLE_HWLOC=ON + -DUMF_DISABLE_HWLOC=ON + -DUMF_BUILD_SHARED_LIBRARY=OFF # Below here is copied from an invocation of buildbot/config.py -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_TARGETS_TO_BUILD=X86 @@ -71,7 +74,7 @@ set(DPCPP_EXTRA_ARGS -DXPTI_ENABLE_WERROR=OFF -DSYCL_CLANG_EXTRA_FLAGS= -DSYCL_ENABLE_PLUGINS=level_zero - -DSYCL_ENABLE_KERNEL_FUSION=OFF + -DSYCL_ENABLE_EXTENSION_JIT=OFF -DSYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB=OFF -DCMAKE_INSTALL_RPATH=\$ORIGIN -DPython3_ROOT_DIR=${LIBDIR}/python/ @@ -79,10 +82,20 @@ set(DPCPP_EXTRA_ARGS -DPYTHON_EXECUTABLE=${PYTHON_BINARY} -DLLDB_ENABLE_CURSES=OFF -DLLVM_ENABLE_TERMINFO=OFF + -DLLVM_ENABLE_ZSTD=FORCE_ON + -DLLVM_USE_STATIC_ZSTD=ON + -Dzstd_INCLUDE_DIR=${LIBDIR}/zstd/include ) if(WIN32) - list(APPEND DPCPP_EXTRA_ARGS -DPython3_FIND_REGISTRY=NEVER) + list(APPEND DPCPP_EXTRA_ARGS + -DPython3_FIND_REGISTRY=NEVER + -Dzstd_LIBRARY=${LIBDIR}/zstd/lib/zstd_static.lib + ) +else() + list(APPEND DPCPP_EXTRA_ARGS + -Dzstd_LIBRARY=${LIBDIR}/zstd/lib/libzstd.a + ) endif() ExternalProject_Add(external_dpcpp @@ -111,7 +124,7 @@ ExternalProject_Add(external_dpcpp ${PATCH_DIR}/dpcpp.diff && ${PATCH_CMD} -p 1 -d ${BUILD_DIR}/dpcpp/src/external_dpcpp < - ${PATCH_DIR}/dpcpp_13328.diff + ${PATCH_DIR}/dpcpp_15124.diff INSTALL_DIR ${LIBDIR}/dpcpp ) @@ -142,9 +155,6 @@ if(WIN32) COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld.exe COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld-link.exe COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/wasm-ld.exe - COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/pi_unified_runtime.dll - COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ur_adapter_level_zero.dll - COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ur_loader.dll DEPENDEES install ) endif() @@ -152,7 +162,6 @@ else() harvest(external_dpcpp dpcpp/bin dpcpp/bin "*") harvest(external_dpcpp dpcpp/include dpcpp/include "*") harvest(external_dpcpp dpcpp/lib dpcpp/lib "libsycl*") - # avoid harvesting libpi_unified_runtime and libur_ as they're optional. - harvest(external_dpcpp dpcpp/lib dpcpp/lib "libpi_level_zero*") + harvest(external_dpcpp dpcpp/lib dpcpp/lib "libur*") harvest(external_dpcpp dpcpp/lib/clang dpcpp/lib/clang "*") endif() diff --git a/build_files/build_environment/cmake/dpcpp_deps.cmake b/build_files/build_environment/cmake/dpcpp_deps.cmake index 08134b8661f..b74c205da64 100644 --- a/build_files/build_environment/cmake/dpcpp_deps.cmake +++ b/build_files/build_environment/cmake/dpcpp_deps.cmake @@ -67,6 +67,9 @@ ExternalProject_Add(external_unifiedruntime URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH} DOWNLOAD_DIR ${DOWNLOAD_DIR} PREFIX ${BUILD_DIR}/unifiedruntime + PATCH_COMMAND ${PATCH_CMD} -p 1 -d + ${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime < + ${PATCH_DIR}/unifiedruntime.diff CONFIGURE_COMMAND echo . BUILD_COMMAND echo . INSTALL_COMMAND echo . @@ -77,6 +80,9 @@ ExternalProject_Add(external_unifiedmemoryframework URL_HASH ${UNIFIED_MEMORY_FRAMEWORK_HASH_TYPE}=${UNIFIED_MEMORY_FRAMEWORK_HASH} DOWNLOAD_DIR ${DOWNLOAD_DIR} PREFIX ${BUILD_DIR}/unifiedmemoryframework + PATCH_COMMAND ${PATCH_CMD} -p 1 -d + ${BUILD_DIR}/unifiedmemoryframework/src/external_unifiedmemoryframework < + ${PATCH_DIR}/unifiedmemoryframework.diff CONFIGURE_COMMAND echo . BUILD_COMMAND echo . INSTALL_COMMAND echo . diff --git a/build_files/build_environment/cmake/versions.cmake b/build_files/build_environment/cmake/versions.cmake index a68f1244cbb..5358f63d2bf 100644 --- a/build_files/build_environment/cmake/versions.cmake +++ b/build_files/build_environment/cmake/versions.cmake @@ -1039,25 +1039,26 @@ set(OPENPGL_HOMEPAGE http://www.openpgl.org/) set(OPENPGL_LICENSE SPDX:Apache-2.0) set(OPENPGL_COPYRIGHT "Copyright 2020 Intel Corporation.") -set(LEVEL_ZERO_VERSION 1.16.1) +# Default version used by DPCPP: unified-runtime/cmake/FetchLevelZero.cmake +set(LEVEL_ZERO_VERSION 1.19.2) set(LEVEL_ZERO_NAME "oneAPI Level Zero") set(LEVEL_ZERO_URI https://codeload.github.com/oneapi-src/level-zero/tar.gz/refs/tags/v${LEVEL_ZERO_VERSION}) -set(LEVEL_ZERO_HASH f341dd6355d8da6ee9c29031642b8e8e4259f91c13c72d318c81663af048817e) +set(LEVEL_ZERO_HASH b0bea0a09d1a68f68ecf8694e58a60e199fa5785f91c2fd59f026133bc1c4f28) set(LEVEL_ZERO_HASH_TYPE SHA256) set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz) set(LEVEL_ZERO_HOMEPAGE https://github.com/oneapi-src/level-zero) set(LEVEL_ZERO_LICENSE SPDX:MIT) -set(LEVEL_ZERO_COPYRIGHT "Copyright (C) 2019-2021 Intel Corporation") +set(LEVEL_ZERO_COPYRIGHT "Copyright (C) 2019-2024 Intel Corporation") -set(DPCPP_VERSION d2817d6d317db1143bb227168e85c409d5ab7c82) # tip of sycl-rel_5_2_0 as of 2024.05.24 +set(DPCPP_VERSION v6.0.0-rc1) set(DPCPP_URI https://github.com/intel/llvm/archive/${DPCPP_VERSION}.tar.gz) -set(DPCPP_HASH 86cbff157b79e29a6ebb96ba79c96f64b4296c33fcd896f60a5579955fca5724) +set(DPCPP_HASH 65f508827f48d9a894cc8c6fbdc9b15760681558ee29f6ebfef608034db99ca1) set(DPCPP_HASH_TYPE SHA256) set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz) set(DPCPP_NAME DPC++) set(DPCPP_HOMEPAGE "https://github.com/intel/llvm#oneapi-dpc-compiler") set(DPCPP_LICENSE SPDX:Apache-2.0) -set(DPCPP_COPYRIGHT "Copyright (C) 2021 Intel Corporation") +set(DPCPP_COPYRIGHT "Copyright (C) 2021-2024 Intel Corporation") ######################## ### DPCPP DEPS BEGIN ### @@ -1068,14 +1069,14 @@ set(DPCPP_COPYRIGHT "Copyright (C) 2021 Intel Corporation") # will take care of building them, unpack is being done in dpcpp_deps.cmake # Source llvm/lib/SYCLLowerIR/CMakeLists.txt -set(VCINTRINSICS_VERSION da892e1982b6c25b9a133f85b4ac97142d8a3def) +set(VCINTRINSICS_VERSION b2565a03eb3cac07f5e8000fde971f95dc782c75) set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz) -set(VCINTRINSICS_HASH 06b85bd988059939770eb6e6e6194562d17c5f5a5df9947af18696b3b1fe92f3) +set(VCINTRINSICS_HASH 4dfccbb60c2a929a97745c7a4cff04cc3f54aca1590b2763ca7842be59b55f01) set(VCINTRINSICS_HASH_TYPE SHA256) set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz) set(VCINTRINSICS_HOMEPAGE https://github.com/intel/vc-intrinsics) set(VCINTRINSICS_LICENSE SPDX:MIT) -set(VCINTRINSICS_COPYRIGHT "Copyright (c) 2019 Intel Corporation") +set(VCINTRINSICS_COPYRIGHT "Copyright (c) 2019-2024 Intel Corporation") # Source opencl/CMakeLists.txt set(OPENCLHEADERS_VERSION 9ddb236e6eb3cf844f9e2f81677e1045f9bf838e) @@ -1101,9 +1102,9 @@ set(ICDLOADER_COPYRIGHT " Copyright (c) 2020 The Khronos Group Inc.") # Source sycl/cmake/modules/AddBoostMp11Headers.cmake # Using external MP11 here, getting AddBoostMp11Headers.cmake to recognize # our copy in boost directly was more trouble than it was worth. -set(MP11_VERSION ef7608b463298b881bc82eae4f45a4385ed74fca) +set(MP11_VERSION 863d8b8d2b20f2acd0b5870f23e553df9ce90e6c) set(MP11_URI https://github.com/boostorg/mp11/archive/${MP11_VERSION}.tar.gz) -set(MP11_HASH ec2d68858dd4d04f9a1e3960fc94a58440715e1b3e746cc495438116715343e2) +set(MP11_HASH 525692267abb8086bb9cc2fe81fb96d73ac645dfa6825cb5114686aafe244e9f) set(MP11_HASH_TYPE SHA256) set(MP11_FILE mp11-${MP11_VERSION}.tar.gz) set(MP11_HOMEPAGE https://github.com/boostorg/mp11) @@ -1120,10 +1121,10 @@ set(SPIRV_HEADERS_HOMEPAGE https://github.com/KhronosGroup/SPIRV-Headers) set(SPIRV_HEADERS_LICENSE SPDX:MIT-Khronos-old) set(SPIRV_HEADERS_COPYRIGHT "Copyright (c) 2015-2024 The Khronos Group Inc.") -# Source sycl/plugins/unified_runtime/CMakeLists.txt -set(UNIFIED_RUNTIME_VERSION ec634ff05b067d7922ec45059dda94665e5dcd9b) +# Source sycl/cmake/modules/FetchUnifiedRuntime.cmake +set(UNIFIED_RUNTIME_VERSION 04db12683146673af9a09e923c19cf9a4ee96982) set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz) -set(UNIFIED_RUNTIME_HASH ff15574aba6225d0c8a32f71866126551dee1aaacfa7894b8fdcc5e52e0f5da9) +set(UNIFIED_RUNTIME_HASH 1ebb6f6ec640dac6279ad84a705ddb48da12e29af9942a7e8fc087f23212f650) set(UNIFIED_RUNTIME_HASH_TYPE SHA256) set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz) set(UNIFIED_RUNTIME_HOMEPAGE https://github.com/oneapi-src/unified-runtime) @@ -1131,9 +1132,9 @@ set(UNIFIED_RUNTIME_LICENSE SPDX:Apache-2.0 WITH LLVM-exception) set(UNIFIED_RUNTIME_COPYRIGHT "Copyright (C) 2019-2024 Intel Corporation") # Source unified-runtime/source/common/CMakeList.txt -set(UNIFIED_MEMORY_FRAMEWORK_VERSION 9bf7a0dc4dff76844e10edbb5c6e9d917536ef6d) +set(UNIFIED_MEMORY_FRAMEWORK_VERSION v0.9.0) set(UNIFIED_MEMORY_FRAMEWORK_URI https://github.com/oneapi-src/unified-memory-framework/archive/${UNIFIED_MEMORY_FRAMEWORK_VERSION}.tar.gz) -set(UNIFIED_MEMORY_FRAMEWORK_HASH 7ff7d0be7be6e59693d238eab02b5a9741c820d3d995446781dcd7a2adaa28e9) +set(UNIFIED_MEMORY_FRAMEWORK_HASH 8594738d84abb4001bb0e962383b8a2604837e7bbc378d0771ecdab436c7d001) set(UNIFIED_MEMORY_FRAMEWORK_HASH_TYPE SHA256) set(UNIFIED_MEMORY_FRAMEWORK_FILE unified-memory-framework-${UNIFIED_MEMORY_FRAMEWORK_VERSION}.tar.gz) set(UNIFIED_MEMORY_FRAMEWORK_HOMEPAGE https://github.com/oneapi-src/unified-memory-framework) diff --git a/build_files/build_environment/patches/dpcpp.diff b/build_files/build_environment/patches/dpcpp.diff index 570f1c25a83..7b311898b21 100644 --- a/build_files/build_environment/patches/dpcpp.diff +++ b/build_files/build_environment/patches/dpcpp.diff @@ -1,44 +1,46 @@ -diff -Naur llvm-sycl-nightly-20220501.orig\opencl/CMakeLists.txt llvm-sycl-nightly-20220501\opencl/CMakeLists.txt ---- llvm-sycl-nightly-20220501.orig/opencl/CMakeLists.txt 2022-04-29 13:47:11 -0600 -+++ llvm-sycl-nightly-20220501/opencl/CMakeLists.txt 2022-05-21 15:25:06 -0600 -@@ -11,6 +11,11 @@ - ) - endif() - -+# Blender code below is determined to use FetchContent_Declare -+# temporarily allow it (but feed it our downloaded tarball -+# in the OpenCL_HEADERS variable -+set(FETCHCONTENT_FULLY_DISCONNECTED OFF) -+ - # Repo URLs - - set(OCL_HEADERS_REPO -@@ -77,5 +82,6 @@ - - FetchContent_MakeAvailable(ocl-icd) - add_library(OpenCL-ICD ALIAS OpenCL) -+set(FETCHCONTENT_FULLY_DISCONNECTED ON) - - add_subdirectory(opencl-aot) -diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake ---- llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-02-08 09:17:24 -0700 -+++ llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-05-24 11:35:51 -0600 -@@ -36,7 +36,9 @@ - add_custom_target(libsycldevice-obj) +diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake +index c1aac6d017ef..99d84d89b9cc 100644 +--- a/libdevice/cmake/modules/SYCLLibdevice.cmake ++++ b/libdevice/cmake/modules/SYCLLibdevice.cmake +@@ -65,7 +65,9 @@ add_custom_target(libsycldevice-obj-new-offload) add_custom_target(libsycldevice-spv) + add_custom_target(libsycldevice-bc) -add_custom_target(libsycldevice DEPENDS +# Blender: add ALL here otherwise this target will not build +# and cause an error due to missing files during the install phase. +add_custom_target(libsycldevice ALL DEPENDS libsycldevice-obj - libsycldevice-spv) + libsycldevice-bc + libsycldevice-obj-new-offload +diff --git a/opencl/CMakeLists.txt b/opencl/CMakeLists.txt +index 6f618033a203..38a7d4ddec22 100644 +--- a/opencl/CMakeLists.txt ++++ b/opencl/CMakeLists.txt +@@ -11,6 +11,11 @@ if (MSVC) + ) + endif() ++# Blender code below is determined to use FetchContent_Declare ++# temporarily allow it (but feed it our downloaded tarball ++# in the OpenCL_HEADERS variable ++set(FETCHCONTENT_FULLY_DISCONNECTED OFF) ++ + # Repo URLs + + set(OCL_HEADERS_REPO +@@ -77,5 +82,6 @@ endif() + + FetchContent_MakeAvailable(ocl-icd) + add_library(OpenCL-ICD ALIAS OpenCL) ++set(FETCHCONTENT_FULLY_DISCONNECTED ON) + + add_subdirectory(opencl-aot) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt -index 00ce045f43c3..e044262e628e 100644 +index 6ee321b7bff9..a77b94ede51f 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt -@@ -188,7 +188,6 @@ install(FILES +@@ -203,7 +203,6 @@ install(FILES COMPONENT sycl-headers) include(AddBoostMp11Headers) @@ -48,10 +50,10 @@ index 00ce045f43c3..e044262e628e 100644 # are not detected by copy_directory command. diff --git a/sycl/cmake/modules/FetchBoostUnorderedHeaders.cmake b/sycl/cmake/modules/FetchBoostUnorderedHeaders.cmake deleted file mode 100644 -index a0f446055026..000000000000 +index 5cabc6cccdbf..000000000000 --- a/sycl/cmake/modules/FetchBoostUnorderedHeaders.cmake +++ /dev/null -@@ -1,129 +0,0 @@ +@@ -1,101 +0,0 @@ -# Fetches the unordered boost module and its dependencies -function(add_boost_module_headers) - cmake_parse_arguments( @@ -81,124 +83,126 @@ index a0f446055026..000000000000 - set(BOOST_UNORDERED_INCLUDE_DIRS ${BOOST_UNORDERED_INCLUDE_DIRS} "${BOOST_MODULE_SRC_DIR}/include" PARENT_SCOPE) -endfunction(add_boost_module_headers) - --set(BOOST_UNORDERED_GIT_TAG bd24dfd284dbc70e7521915af0d8d049f74a1e85) --# Author: joaquintides --# Date: Tue Jul 18 18:19:13 2023 +0200 +-set(BOOST_UNORDERED_GIT_TAG 5e6b9291deb55567d41416af1e77c2516dc1250f) +-# Merge: 15cfef69 ccf9a76e +-# Author: joaquintides +-# Date: Sat Mar 16 09:18:41 2024 +0100 -# --# updated concurrent map benchmark plots +-# Merge pull request #238 from boostorg/fix/gh-237 -add_boost_module_headers(NAME "unordered" SRC_DIR ${BOOST_UNORDERED_SOURCE_DIR} GIT_TAG ${BOOST_UNORDERED_GIT_TAG}) - --set(BOOST_ASSERT_GIT_TAG 02256c84fd0cd58a139d9dc1b25b5019ca976ada) +-set(BOOST_ASSERT_GIT_TAG 447e0b3a331930f8708ade0e42683d12de9dfbc3) -# Author: Peter Dimov --# Date: Thu Jun 22 18:11:58 2023 +0300 +-# Date: Sat Feb 3 20:43:55 2024 +0200 -# --# Do not use std::source_location::current under nvcc. Fixes #32. +-# Use __builtin_FUNCSIG() under MSVC 19.35+. Fixes #35. -add_boost_module_headers(NAME "assert" SRC_DIR ${BOOST_ASSERT_SOURCE_DIR} GIT_TAG ${BOOST_ASSERT_GIT_TAG}) - --set(BOOST_CONFIG_GIT_TAG a1cf5d531405e62927b0257b5cbecc66a545b508) --# Merge: f5726a26 a1edcd56 +-set(BOOST_CONFIG_GIT_TAG 11385ec21012926e15a612e3bf9f9a71403c1e5b) +-# Merge: eef05e98 601598f8 -# Author: jzmaddock --# Date: Sat Apr 15 13:20:12 2023 +0100 +-# Date: Sun Feb 4 09:46:22 2024 +0000 -# --# Merge pull request #475 from boostorg/ci_2023_04 +-# Merge branch 'develop' -add_boost_module_headers(NAME "config" SRC_DIR ${BOOST_CONFIG_SOURCE_DIR} GIT_TAG ${BOOST_CONFIG_GIT_TAG}) - --set(BOOST_CONTAINER_HASH_GIT_TAG 226eb066e949adbf37b220e993d64ecefeeaae99) +-set(BOOST_CONTAINER_HASH_GIT_TAG 6d214eb776456bf17fbee20780a034a23438084f) -# Author: Peter Dimov --# Date: Thu Jun 29 14:38:53 2023 +0300 +-# Date: Wed Mar 6 05:13:53 2024 +0200 -# --# Update .drone.jsonnet +-# Update .appveyor.yml -add_boost_module_headers(NAME "container_hash" SRC_DIR ${BOOST_CONTAINER_HASH_SOURCE_DIR} GIT_TAG ${BOOST_CONTAINER_HASH_GIT_TAG}) - --set(BOOST_CORE_GIT_TAG 216999e552e7f73e63c7bcc88b8ce9c179bbdbe2) --# Author: Peter Dimov --# Date: Sun Jun 25 13:46:53 2023 +0300 +-set(BOOST_CORE_GIT_TAG 083b41c17e34f1fc9b43ab796b40d0d8bece685c) +-# Merge: 8cc2fda a973490 +-# Author: Andrey Semashev +-# Date: Tue Mar 19 18:10:04 2024 +0300 -# --# Avoid -Wsign-conversion warning in checked_delete.hpp +-# Merge pull request #169 from k3DW/feature/168 -add_boost_module_headers(NAME "core" SRC_DIR ${BOOST_CORE_SOURCE_DIR} GIT_TAG ${BOOST_CORE_GIT_TAG}) - -# Describe is a dependency of container_hash --set(BOOST_DESCRIBE_GIT_TAG a0eafb08100eb15a57b6dae6d270c0012a56aa21) --# Merge: 1692c3e b54fda5 +-set(BOOST_DESCRIBE_GIT_TAG 50719b212349f3d1268285c586331584d3dbfeb5) -# Author: Peter Dimov --# Date: Sun May 21 04:51:35 2023 +0300 +-# Date: Sat Mar 23 20:27:08 2024 +0200 -# --# Merge branch 'fix-deprecated-inline-static-variables' of https://github.com/Romain-Geissler-1A/describe into feature/pr-40 +-# Update .drone.jsonnet -add_boost_module_headers(NAME "describe" SRC_DIR ${BOOST_DESCRIBE_SOURCE_DIR} GIT_TAG ${BOOST_DESCRIBE_GIT_TAG}) - --set(BOOST_MOVE_GIT_TAG f1fbb45134065deebe95249c616a967d4b66c809) --# Author: Ion GaztaƱaga --# Date: Mon Mar 13 13:32:29 2023 +0100 --# --# Use [[msvc::intrinsic] attribute if available in move/forward in order to improve debug experience --add_boost_module_headers(NAME "move" SRC_DIR ${BOOST_MOVE_SOURCE_DIR} GIT_TAG ${BOOST_MOVE_GIT_TAG}) -- -# Reuse mp11 fetched earlier for DPC++ headers -set(BOOST_UNORDERED_INCLUDE_DIRS ${BOOST_UNORDERED_INCLUDE_DIRS} "${BOOST_MP11_SOURCE_DIR}/include/") - --set(BOOST_PREDEF_GIT_TAG 392e4e767469e3469c9390f0d9cca16724dc3fc8) --# Merge: a12c7fd 499d28e +-set(BOOST_PREDEF_GIT_TAG 0fdfb49c3a6789e50169a44e88a07cc889001106) +-# Merge: 392e4e7 614546d -# Author: Rene Rivera --# Date: Sun Feb 27 14:44:35 2022 -0600 --# --# Release 1.14. --add_boost_module_headers(NAME "predef" SRC_DIR ${BOOST_PREDEF_SOURCE_DIR} GIT_TAG ${BOOST_PREDEF_GIT_TAG}) -- --set(BOOST_PREPROCESSOR_GIT_TAG 667e87b3392db338a919cbe0213979713aca52e3) --# Author: Peter Dimov --# Date: Tue Aug 16 20:59:52 2022 +0300 --# --# Change C test names to not conflict with the C++ ones --add_boost_module_headers(NAME "preprocessor" SRC_DIR ${BOOST_PREPROCESSOR_SOURCE_DIR} GIT_TAG ${BOOST_PREPROCESSOR_GIT_TAG}) -- --set(BOOST_STATIC_ASSERT_GIT_TAG 45eec41c293bc5cd36ec3ed83671f70bc1aadc9f) --# Merge: ba72d33 a1abfec --# Author: jzmaddock --# Date: Tue Mar 8 09:35:50 2022 +0000 --# --# Merge pull request #15 from sdarwin/githubactions --add_boost_module_headers(NAME "static_assert" SRC_DIR ${BOOST_STATIC_ASSERT_SOURCE_DIR} GIT_TAG ${BOOST_STATIC_ASSERT_GIT_TAG}) -- --set(BOOST_THROW_EXCEPTION_GIT_TAG 23dd41e920ecd91237500ac6428f7d392a7a875c) --# Author: Peter Dimov --# Date: Sun Jun 25 16:12:57 2023 +0300 --# --# Update ci.yml --add_boost_module_headers(NAME "throw_exception" SRC_DIR ${BOOST_THROW_EXCEPTION_SOURCE_DIR} GIT_TAG ${BOOST_THROW_EXCEPTION_GIT_TAG}) -- --set(BOOST_TUPLE_GIT_TAG 500e4fa0a2845b96c0dd919e7485e0f216438a01) --# Merge: aa16ae3 ded3c1d --# Author: Joel de Guzman --# Date: Thu Dec 30 23:20:18 2021 +0800 --# --# Merge pull request #21 from igaztanaga/patch-1 --add_boost_module_headers(NAME "tuple" SRC_DIR ${BOOST_TUPLE_SOURCE_DIR} GIT_TAG ${BOOST_TUPLE_GIT_TAG}) -- --set(BOOST_TYPE_TRAITS_GIT_TAG 89f5011b4a79d91e42735670e39f72cb25c86c72) --# Merge: 55feb75 1ebd31e --# Author: John Maddock --# Date: Fri Feb 24 18:02:30 2023 +0000 +-# Date: Tue Oct 31 20:24:41 2023 -0500 -# -# Merge branch 'develop' --add_boost_module_headers(NAME "type_traits" SRC_DIR ${BOOST_TYPE_TRAITS_SOURCE_DIR} GIT_TAG ${BOOST_TYPE_TRAITS_GIT_TAG}) -diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt -index ead8f2c83ab7..6fb8305a1a88 100644 ---- a/sycl/source/CMakeLists.txt -+++ b/sycl/source/CMakeLists.txt -@@ -69,8 +69,6 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) - target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) +-add_boost_module_headers(NAME "predef" SRC_DIR ${BOOST_PREDEF_SOURCE_DIR} GIT_TAG ${BOOST_PREDEF_GIT_TAG}) +- +-# Static assert is a dependency of core +-set(BOOST_STATIC_ASSERT_GIT_TAG ba72d3340f3dc6e773868107f35902292f84b07e) +-# Merge: 392e4e7 614546d +-# Author: Rene Rivera +-# Date: Tue Oct 31 20:24:41 2023 -0500 +-# +-# Merge branch 'develop' +-add_boost_module_headers(NAME "static_assert" SRC_DIR ${BOOST_STATIC_ASSERT_SOURCE_DIR} GIT_TAG ${BOOST_STATIC_ASSERT_GIT_TAG}) +- +-set(BOOST_THROW_EXCEPTION_GIT_TAG 7c8ec2114bc1f9ab2a8afbd629b96fbdd5901294) +-# Author: Peter Dimov +-# Date: Sat Jan 6 19:41:56 2024 +0200 +-# +-# Add -Wundef to test/Jamfile +-add_boost_module_headers(NAME "throw_exception" SRC_DIR ${BOOST_THROW_EXCEPTION_SOURCE_DIR} GIT_TAG ${BOOST_THROW_EXCEPTION_GIT_TAG}) +diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake +index 41268945b078..895065b8f1a7 100644 +--- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake ++++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake +@@ -128,11 +128,11 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT) + # to link statically on windows + if(WIN32) + set(UMF_BUILD_SHARED_LIBRARY OFF CACHE INTERNAL "Build UMF shared library") +- set(UMF_LINK_HWLOC_STATICALLY ON CACHE INTERNAL "static HWLOC") +- else() +- set(UMF_DISABLE_HWLOC ${SYCL_UMF_DISABLE_HWLOC} CACHE INTERNAL "Disable hwloc for UMF") endif() -- target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) -- - # pi_win_proxy_loader - if (WIN32) - include_directories(${LLVM_EXTERNAL_SYCL_SOURCE_DIR}/pi_win_proxy_loader) ++ set(UMF_LINK_HWLOC_STATICALLY OFF CACHE INTERNAL "static HWLOC") ++ set(UMF_DISABLE_HWLOC ${SYCL_UMF_DISABLE_HWLOC} CACHE INTERNAL "Disable hwloc for UMF") ++ + fetch_adapter_source(level_zero + ${UNIFIED_RUNTIME_REPO} + ${UNIFIED_RUNTIME_TAG} +diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp +index 454a43fe9953..50fa14fbbd5d 100644 +--- a/sycl/source/detail/graph_impl.hpp ++++ b/sycl/source/detail/graph_impl.hpp +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + namespace sycl { + inline namespace _V1 { +diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp +index 2a128ba9a901..db8faf6e188f 100644 +--- a/sycl/source/detail/kernel_bundle_impl.hpp ++++ b/sycl/source/detail/kernel_bundle_impl.hpp +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #include + + #include "split_string.hpp" diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp -index 87a41d9fe105..c0a572b4d144 100644 +index f170b55a6348..9f593359c7f3 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp -@@ -18,12 +18,10 @@ +@@ -19,12 +19,10 @@ #include #include @@ -212,8 +216,8 @@ index 87a41d9fe105..c0a572b4d144 100644 // For testing purposes class MockKernelProgramCache; -@@ -113,8 +111,8 @@ public: - std::pair; +@@ -123,8 +121,8 @@ public: + using CommonProgramKeyT = std::pair; struct ProgramCache { - ::boost::unordered_map Cache; @@ -223,23 +227,22 @@ index 87a41d9fe105..c0a572b4d144 100644 size_t size() const noexcept { return Cache.size(); } }; -@@ -138,10 +136,8 @@ public: +@@ -152,23 +150,15 @@ public: }; using KernelBuildResultPtr = std::shared_ptr; - using KernelByNameT = - ::boost::unordered_map; - using KernelCacheT = -- ::boost::unordered_map; +- ::boost::unordered_map; + using KernelByNameT = std::map; -+ using KernelCacheT = std::map; ++ using KernelCacheT = std::map; using KernelFastCacheKeyT = - std::tuple; using KernelFastCacheValT = - std::tuple; + std::tuple; - // This container is used as a fast path for retrieving cached kernels. - // unordered_flat_map is used here to reduce lookup overhead. - // The slow path is used only once for each newly created kernel, so the @@ -252,7 +255,7 @@ index 87a41d9fe105..c0a572b4d144 100644 ~KernelProgramCache() = default; diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt -index 71d2413c2974..0c55c870c4b8 100644 +index ec740f913ed4..c0e04bfaf119 100644 --- a/sycl/unittests/CMakeLists.txt +++ b/sycl/unittests/CMakeLists.txt @@ -1,6 +1,5 @@ @@ -262,6 +265,3 @@ index 71d2413c2974..0c55c870c4b8 100644 foreach(flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE --- -2.30.1.windows.1 - diff --git a/build_files/build_environment/patches/dpcpp_13328.diff b/build_files/build_environment/patches/dpcpp_13328.diff deleted file mode 100644 index 2f880818a30..00000000000 --- a/build_files/build_environment/patches/dpcpp_13328.diff +++ /dev/null @@ -1,12 +0,0 @@ -diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt -index bfeb4a763da84..94400f08154a2 100644 ---- a/clang/lib/Driver/CMakeLists.txt -+++ b/clang/lib/Driver/CMakeLists.txt -@@ -98,6 +98,7 @@ add_clang_library(clangDriver - - DEPENDS - ClangDriverOptions -+ DeviceConfigFile - - LINK_LIBS - clangBasic diff --git a/build_files/build_environment/patches/dpcpp_15124.diff b/build_files/build_environment/patches/dpcpp_15124.diff new file mode 100644 index 00000000000..8993d795962 --- /dev/null +++ b/build_files/build_environment/patches/dpcpp_15124.diff @@ -0,0 +1,1036 @@ +diff --git a/buildbot/configure.py b/buildbot/configure.py +index 692a64fd3125..424df77c2513 100644 +--- a/buildbot/configure.py ++++ b/buildbot/configure.py +@@ -178,6 +178,8 @@ def do_configure(args): + "-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects), + "-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform), + "-DLLVM_BUILD_TOOLS=ON", ++ "-DLLVM_ENABLE_ZSTD=ON", ++ "-DLLVM_USE_STATIC_ZSTD=ON", + "-DSYCL_ENABLE_WERROR={}".format(sycl_werror), + "-DCMAKE_INSTALL_PREFIX={}".format(install_dir), + "-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests. +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 17dd75a265a8..8472efa23fdf 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -10043,6 +10043,19 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, + SmallString<128> TargetTripleOpt = TT.getArchName(); + bool WrapFPGADevice = false; + bool FPGAEarly = false; ++ ++ // Validate and propogate CLI options related to device image compression. ++ // -offload-compress ++ if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) { ++ WrapperArgs.push_back( ++ C.getArgs().MakeArgString(Twine("-offload-compress"))); ++ // -offload-compression-level=<> ++ if (Arg *A = C.getInputArgs().getLastArg( ++ options::OPT_offload_compression_level_EQ)) ++ WrapperArgs.push_back(C.getArgs().MakeArgString( ++ Twine("-offload-compression-level=") + A->getValue())); ++ } ++ + if (Arg *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ)) { + WrapFPGADevice = true; + FPGAEarly = (A->getValue() == StringRef("early")); +diff --git a/clang/test/Driver/clang-offload-wrapper-zstd.c b/clang/test/Driver/clang-offload-wrapper-zstd.c +new file mode 100644 +index 000000000000..bc5fadfc4cf4 +--- /dev/null ++++ b/clang/test/Driver/clang-offload-wrapper-zstd.c +@@ -0,0 +1,40 @@ ++// REQUIRES: zstd && (system-windows || system-linux) ++ ++// clang-offload-wrapper compression test: checks that the wrapper can compress the device images. ++// Checks the '--offload-compress', '--offload-compression-level', and '--offload-compression-threshold' ++// CLI options. ++ ++// --- Prepare test data by creating the debice binary image. ++// RUN: echo -e -n 'device binary image1\n' > %t.bin ++// RUN: echo -e -n '[Category1]\nint_prop1=1|10\n[Category2]\nint_prop2=1|20\n' > %t.props ++// RUN: echo -e -n 'kernel1\nkernel2\n' > %t.sym ++// RUN: echo -e -n 'Manifest file - arbitrary data generated by the toolchain\n' > %t.mnf ++// RUN: echo '[Code|Properties|Symbols|Manifest]' > %t.img1 ++// RUN: echo %t.bin"|"%t.props"|"%t.sym"|"%t.mnf >> %t.img1 ++ ++/////////////////////////////////////////////////////// ++// Compress the test image using clang-offload-wrapper. ++/////////////////////////////////////////////////////// ++ ++// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ ++// RUN: --offload-compress --offload-compression-level=9 --offload-compression-threshold=0 \ ++// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS ++ ++// CHECK-COMPRESS: [Compression] Original image size: ++// CHECK-COMPRESS: [Compression] Compressed image size: ++// CHECK-COMPRESS: [Compression] Compression level used: 9 ++ ++/////////////////////////////////////////////////////////// ++// Check that there is no compression when the threshold is set to a value higher than the image size ++// or '--offload-compress' is not set. ++/////////////////////////////////////////////////////////// ++ ++// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ ++// RUN: --offload-compress --offload-compression-level=3 --offload-compression-threshold=1000 \ ++// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS ++ ++// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \ ++// RUN: --offload-compression-level=3 --offload-compression-threshold=0 \ ++// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS ++ ++// CHECK-NO-COMPRESS-NOT: [Compression] Original image size: +diff --git a/clang/test/Driver/sycl-offload-wrapper-compression.cpp b/clang/test/Driver/sycl-offload-wrapper-compression.cpp +new file mode 100644 +index 000000000000..1ef9282ee359 +--- /dev/null ++++ b/clang/test/Driver/sycl-offload-wrapper-compression.cpp +@@ -0,0 +1,14 @@ ++/// ++/// Check if '--offload-compress' and '--offload-compression-level' CLI ++/// options are passed to the clang-offload-wrapper. ++/// ++ ++// RUN: %clangxx -### -fsycl --offload-compress --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS ++// CHECK-COMPRESS: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}}"-offload-compression-level=3"{{.*}} ++ ++// Make sure that the compression options are not passed when --offload-compress is not set. ++// RUN: %clangxx -### -fsycl %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS ++// RUN: %clangxx -### -fsycl --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS ++ ++// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}} ++// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compression-level=3"{{.*}} +diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt +index 9cb5ec66c644..3195f18fe23c 100644 +--- a/clang/tools/clang-offload-wrapper/CMakeLists.txt ++++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt +@@ -10,6 +10,7 @@ add_clang_tool(clang-offload-wrapper + + set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS + clangBasic ++ LLVMSupport + ) + + add_dependencies(clang clang-offload-wrapper) +diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +index 5facbb4329ab..6ce4c5dc36ab 100644 +--- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp ++++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +@@ -67,6 +67,9 @@ + #include + #include + ++// For device image compression. ++#include ++ + #define OPENMP_OFFLOAD_IMAGE_VERSION "1.0" + + using namespace llvm; +@@ -139,6 +142,25 @@ static cl::list Inputs(cl::Positional, cl::OneOrMore, + cl::desc(""), + cl::cat(ClangOffloadWrapperCategory)); + ++// CLI options for device image compression. ++static cl::opt OffloadCompressDevImgs( ++ "offload-compress", cl::init(false), cl::Optional, ++ cl::desc("Enable device image compression using ZSTD."), ++ cl::cat(ClangOffloadWrapperCategory)); ++ ++static cl::opt ++ OffloadCompressLevel("offload-compression-level", cl::init(10), ++ cl::Optional, ++ cl::desc("ZSTD Compression level. Default: 10"), ++ cl::cat(ClangOffloadWrapperCategory)); ++ ++static cl::opt ++ OffloadCompressThreshold("offload-compression-threshold", cl::init(512), ++ cl::Optional, ++ cl::desc("Threshold (in bytes) over which to " ++ "compress images. Default: 512"), ++ cl::cat(ClangOffloadWrapperCategory)); ++ + // Binary image formats supported by this tool. The support basically means + // mapping string representation given at the command line to a value from this + // enum. No format checking is performed. +@@ -146,8 +168,9 @@ enum BinaryImageFormat { + none, // image kind is not determined + native, // image kind is native + // portable image kinds go next +- spirv, // SPIR-V +- llvmbc // LLVM bitcode ++ spirv, // SPIR-V ++ llvmbc, // LLVM bitcode ++ compressed_none // compressed image with unknown format + }; + + /// Sets offload kind. +@@ -265,6 +288,8 @@ static StringRef formatToString(BinaryImageFormat Fmt) { + return "llvmbc"; + case BinaryImageFormat::native: + return "native"; ++ case BinaryImageFormat::compressed_none: ++ return "compressed_none"; + } + llvm_unreachable("bad format"); + +@@ -1083,10 +1108,66 @@ private: + return FBinOrErr.takeError(); + Fbin = *FBinOrErr; + } else { +- Fbin = addDeviceImageToModule( +- ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), +- Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, +- Img.Tgt); ++ ++ // If '--offload-compress' option is specified and zstd is not ++ // available, throw an error. ++ if (OffloadCompressDevImgs && !llvm::compression::zstd::isAvailable()) { ++ return createStringError( ++ inconvertibleErrorCode(), ++ "'--offload-compress' option is specified but zstd " ++ "is not available. The device image will not be " ++ "compressed."); ++ } ++ ++ // Don't compress if the user explicitly specifies the binary image ++ // format or if the image is smaller than OffloadCompressThreshold ++ // bytes. ++ if (Kind != OffloadKind::SYCL || !OffloadCompressDevImgs || ++ Img.Fmt != BinaryImageFormat::none || ++ !llvm::compression::zstd::isAvailable() || ++ static_cast(Bin->getBufferSize()) < OffloadCompressThreshold) { ++ Fbin = addDeviceImageToModule( ++ ArrayRef(Bin->getBufferStart(), Bin->getBufferSize()), ++ Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, ++ Img.Tgt); ++ } else { ++ ++ // Compress the image using zstd. ++ SmallVector CompressedBuffer; ++#if LLVM_ENABLE_EXCEPTIONS ++ try { ++#endif ++ llvm::compression::zstd::compress( ++ ArrayRef( ++ (const unsigned char *)(Bin->getBufferStart()), ++ Bin->getBufferSize()), ++ CompressedBuffer, OffloadCompressLevel); ++#if LLVM_ENABLE_EXCEPTIONS ++ } catch (const std::exception &ex) { ++ return createStringError(inconvertibleErrorCode(), ++ std::string("Failed to compress the device image: \n") + ++ std::string(ex.what())); ++ } ++#endif ++ if (Verbose) ++ errs() << "[Compression] Original image size: " ++ << Bin->getBufferSize() << "\n" ++ << "[Compression] Compressed image size: " ++ << CompressedBuffer.size() << "\n" ++ << "[Compression] Compression level used: " ++ << OffloadCompressLevel << "\n"; ++ ++ // Add the compressed image to the module. ++ Fbin = addDeviceImageToModule( ++ ArrayRef((const char *)CompressedBuffer.data(), ++ CompressedBuffer.size()), ++ Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind, ++ Img.Tgt); ++ ++ // Change image format to compressed_none. ++ Ffmt = ConstantInt::get(Type::getInt8Ty(C), ++ BinaryImageFormat::compressed_none); ++ } + } + + if (Kind == OffloadKind::SYCL) { +diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md +index 6a9e12882518..3f184edc12de 100644 +--- a/sycl/doc/UsersManual.md ++++ b/sycl/doc/UsersManual.md +@@ -195,6 +195,19 @@ and not recommended to use in production environment. + which may or may not perform additional inlining. + Default value is 225. + ++**`--offload-compress`** ++ ++ Enables device image compression for SYCL offloading. Device images ++ are compressed using `zstd` compression algorithm and only if their size ++ exceeds 512 bytes. ++ Default value is false. ++ ++**`--offload-compression-level=`** ++ ++ `zstd` compression level used to compress device images when `--offload- ++ compress` is enabled. ++ The default value is 10. ++ + ## Target toolchain options + + **`-Xsycl-target-backend= "options"`** +diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt +index f0067a45b20a..be683124f7f9 100644 +--- a/sycl/source/CMakeLists.txt ++++ b/sycl/source/CMakeLists.txt +@@ -69,6 +69,13 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME) + target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB}) + endif() + ++ if (NOT LLVM_ENABLE_ZSTD) ++ target_compile_definitions(${LIB_OBJ_NAME} PRIVATE SYCL_RT_ZSTD_NOT_AVAIABLE) ++ else() ++ target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY}) ++ target_include_directories(${LIB_OBJ_NAME} PRIVATE ${zstd_INCLUDE_DIR}) ++ endif() ++ + target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS}) + + # ur_win_proxy_loader +diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp +index 9d2777e863ee..827ee61ef811 100644 +--- a/sycl/source/detail/compiler.hpp ++++ b/sycl/source/detail/compiler.hpp +@@ -115,7 +115,8 @@ enum sycl_device_binary_type : uint8_t { + SYCL_DEVICE_BINARY_TYPE_NONE = 0, // undetermined + SYCL_DEVICE_BINARY_TYPE_NATIVE = 1, // specific to a device + SYCL_DEVICE_BINARY_TYPE_SPIRV = 2, +- SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3 ++ SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3, ++ SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE = 4 + }; + + // Device binary descriptor version supported by this library. +diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp +new file mode 100644 +index 000000000000..1878010cd5ba +--- /dev/null ++++ b/sycl/source/detail/compression.hpp +@@ -0,0 +1,153 @@ ++//==---------- compression.hpp --------------------------------------------===// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++#pragma once ++ ++#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE ++ ++#include ++ ++#include ++#include ++#include ++ ++#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) ++#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) ++ ++namespace sycl { ++inline namespace _V1 { ++namespace detail { ++ ++// Singleton class to handle ZSTD compression and decompression. ++class ZSTDCompressor { ++private: ++ ZSTDCompressor() {} ++ ++ ZSTDCompressor(const ZSTDCompressor &) = delete; ++ ZSTDCompressor &operator=(const ZSTDCompressor &) = delete; ++ ~ZSTDCompressor() {} ++ ++ // Get the singleton instance of the ZSTDCompressor class. ++ static ZSTDCompressor &GetSingletonInstance() { ++ static ZSTDCompressor instance; ++ return instance; ++ } ++ ++ // Public APIs ++public: ++ // Blob (de)compression do not assume format/structure of the input buffer. ++ // This function can be used in future for compression in on-disk cache. ++ static std::unique_ptr CompressBlob(const char *src, size_t srcSize, ++ size_t &dstSize, int level) { ++ auto &instance = GetSingletonInstance(); ++ ++ // Lazy initialize compression context. ++ if (!instance.m_ZSTD_compression_ctx) { ++ ++ // Call ZSTD_createCCtx() and ZSTD_freeCCtx() to create and free the ++ // context. ++ instance.m_ZSTD_compression_ctx = ++ std::unique_ptr(ZSTD_createCCtx(), ++ ZSTD_freeCCtx); ++ if (!instance.m_ZSTD_compression_ctx) { ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ "Failed to create ZSTD compression context"); ++ } ++ } ++ ++ // Get maximum size of the compressed buffer and allocate it. ++ auto dstBufferSize = ZSTD_compressBound(srcSize); ++ auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); ++ ++ if (!dstBuffer) ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ "Failed to allocate memory for compressed data"); ++ ++ // Compress the input buffer. ++ dstSize = ++ ZSTD_compressCCtx(instance.m_ZSTD_compression_ctx.get(), ++ static_cast(dstBuffer.get()), dstBufferSize, ++ static_cast(src), srcSize, level); ++ ++ // Store the error code if compression failed. ++ if (ZSTD_isError(dstSize)) ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ ZSTD_getErrorName(dstSize)); ++ ++ // Pass ownership of the buffer to the caller. ++ return dstBuffer; ++ } ++ ++ static size_t GetDecompressedSize(const char *src, size_t srcSize) { ++ size_t dstBufferSize = ZSTD_getFrameContentSize(src, srcSize); ++ ++ if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN || ++ dstBufferSize == ZSTD_CONTENTSIZE_ERROR) { ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ "Error determining size of uncompressed data."); ++ } ++ return dstBufferSize; ++ } ++ ++ static std::unique_ptr DecompressBlob(const char *src, size_t srcSize, ++ size_t &dstSize) { ++ auto &instance = GetSingletonInstance(); ++ ++ // Lazy initialize decompression context. ++ if (!instance.m_ZSTD_decompression_ctx) { ++ ++ // Call ZSTD_createDCtx() and ZSTD_freeDCtx() to create and free the ++ // context. ++ instance.m_ZSTD_decompression_ctx = ++ std::unique_ptr(ZSTD_createDCtx(), ++ ZSTD_freeDCtx); ++ if (!instance.m_ZSTD_decompression_ctx) { ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ "Failed to create ZSTD decompression context"); ++ } ++ } ++ ++ // Size of decompressed image can be larger than what we can allocate ++ // on heap. In that case, we need to use streaming decompression. ++ auto dstBufferSize = GetDecompressedSize(src, srcSize); ++ ++ // Allocate buffer for decompressed data. ++ auto dstBuffer = std::unique_ptr(new char[dstBufferSize]); ++ ++ if (!dstBuffer) ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ "Failed to allocate memory for decompressed data"); ++ ++ dstSize = ++ ZSTD_decompressDCtx(instance.m_ZSTD_decompression_ctx.get(), ++ static_cast(dstBuffer.get()), dstBufferSize, ++ static_cast(src), srcSize); ++ ++ // In case of decompression error, return the error message and set dstSize ++ // to 0. ++ if (ZSTD_isError(dstSize)) { ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ ZSTD_getErrorName(dstSize)); ++ } ++ ++ // Pass ownership of the buffer to the caller. ++ return dstBuffer; ++ } ++ ++ // Data fields ++private: ++ // ZSTD contexts. Reusing ZSTD context speeds up subsequent (de)compression. ++ std::unique_ptr m_ZSTD_compression_ctx{ ++ nullptr, nullptr}; ++ std::unique_ptr m_ZSTD_decompression_ctx{ ++ nullptr, nullptr}; ++}; ++} // namespace detail ++} // namespace _V1 ++} // namespace sycl ++ ++#endif // SYCL_RT_ZSTD_NOT_AVAIABLE +diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp +index beb9bae0dd0f..2be48d4a38fc 100644 +--- a/sycl/source/detail/device_binary_image.cpp ++++ b/sycl/source/detail/device_binary_image.cpp +@@ -9,6 +9,9 @@ + #include + #include + ++// For device image compression. ++#include ++ + #include + #include + #include +@@ -167,6 +170,8 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) { + // it when invoking the offload wrapper job + Format = static_cast(Bin->Format); + ++ // For compressed images, we delay determining the format until the image is ++ // decompressed. + if (Format == SYCL_DEVICE_BINARY_TYPE_NONE) + // try to determine the format; may remain "NONE" + Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); +@@ -226,6 +231,48 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() { + Bin = nullptr; + } + ++#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE ++CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage( ++ sycl_device_binary CompressedBin) ++ : RTDeviceBinaryImage() { ++ ++ // 'CompressedBin' is part of the executable image loaded into memory ++ // which can't be modified easily. So, we need to make a copy of it. ++ Bin = new sycl_device_binary_struct(*CompressedBin); ++ ++ // Get the decompressed size of the binary image. ++ m_ImageSize = ZSTDCompressor::GetDecompressedSize( ++ reinterpret_cast(Bin->BinaryStart), ++ static_cast(Bin->BinaryEnd - Bin->BinaryStart)); ++ ++ init(Bin); ++} ++ ++void CompressedRTDeviceBinaryImage::Decompress() { ++ ++ size_t CompressedDataSize = ++ static_cast(Bin->BinaryEnd - Bin->BinaryStart); ++ ++ size_t DecompressedSize = 0; ++ m_DecompressedData = ZSTDCompressor::DecompressBlob( ++ reinterpret_cast(Bin->BinaryStart), CompressedDataSize, ++ DecompressedSize); ++ ++ Bin->BinaryStart = ++ reinterpret_cast(m_DecompressedData.get()); ++ Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize; ++ ++ Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize()); ++ Format = static_cast(Bin->Format); ++} ++ ++CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() { ++ // De-allocate device binary struct. ++ delete Bin; ++ Bin = nullptr; ++} ++#endif // SYCL_RT_ZSTD_NOT_AVAIABLE ++ + } // namespace detail + } // namespace _V1 + } // namespace sycl +diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp +index 49047a04ae77..203427b89ca4 100644 +--- a/sycl/source/detail/device_binary_image.hpp ++++ b/sycl/source/detail/device_binary_image.hpp +@@ -7,12 +7,12 @@ + //===----------------------------------------------------------------------===// + #pragma once + ++#include "ur_utils.hpp" + #include + #include + #include + #include + #include +-#include "ur_utils.hpp" + + #include + +@@ -158,7 +158,10 @@ public: + virtual void print() const; + virtual void dump(std::ostream &Out) const; + +- size_t getSize() const { ++ // getSize will be overridden in the case of compressed binary images. ++ // In that case, we return the size of uncompressed data, instead of ++ // BinaryEnd - BinaryStart. ++ virtual size_t getSize() const { + assert(Bin && "binary image data not set"); + return static_cast(Bin->BinaryEnd - Bin->BinaryStart); + } +@@ -276,6 +279,35 @@ protected: + std::unique_ptr Data; + }; + ++#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE ++// Compressed device binary image. Decompression happens when the image is ++// actually used to build a program. ++// Also, frees the decompressed data in destructor. ++class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage { ++public: ++ CompressedRTDeviceBinaryImage(sycl_device_binary Bin); ++ ~CompressedRTDeviceBinaryImage() override; ++ ++ void Decompress(); ++ ++ // We return the size of decompressed data, not the size of compressed data. ++ size_t getSize() const override { ++ assert(Bin && "binary image data not set"); ++ return m_ImageSize; ++ } ++ ++ bool IsCompressed() const { return m_DecompressedData.get() == nullptr; } ++ void print() const override { ++ RTDeviceBinaryImage::print(); ++ std::cerr << " COMPRESSED\n"; ++ } ++ ++private: ++ std::unique_ptr m_DecompressedData; ++ size_t m_ImageSize; ++}; ++#endif // SYCL_RT_ZSTD_NOT_AVAIABLE ++ + } // namespace detail + } // namespace _V1 + } // namespace sycl +diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp +index 01e567fb03c3..e7357a45d117 100644 +--- a/sycl/source/detail/program_manager/program_manager.cpp ++++ b/sycl/source/detail/program_manager/program_manager.cpp +@@ -174,6 +174,8 @@ isDeviceBinaryTypeSupported(const context &C, + return "SPIR-V"; + case SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE: + return "LLVM IR"; ++ case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE: ++ return "compressed none"; + } + assert(false && "Unknown device image format"); + return "unknown"; +@@ -721,6 +723,14 @@ setSpecializationConstants(const std::shared_ptr &InputImpl, + } + } + ++static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { ++#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE ++ if (auto CompImg = dynamic_cast(Img)) ++ if (CompImg->IsCompressed()) ++ CompImg->Decompress(); ++#endif ++} ++ + // When caching is enabled, the returned UrProgram will already have + // its ref count incremented. + ur_program_handle_t ProgramManager::getBuiltURProgram( +@@ -773,6 +783,10 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( + collectDeviceImageDepsForImportedSymbols(Img, Device); + DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); + ++ // Decompress all DeviceImagesToLink ++ for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) ++ CheckAndDecompressImage(BinImg); ++ + std::vector AllImages; + AllImages.reserve(ImageDeps.size() + 1); + AllImages.push_back(&Img); +@@ -1317,6 +1331,10 @@ ProgramManager::getDeviceImage(const std::string &KernelName, + Device); + } + } ++ ++ // Decompress the image if it is compressed. ++ CheckAndDecompressImage(Img); ++ + if (Img) { + CheckJITCompilationForImage(Img, JITCompilationIsRequired); + +@@ -1458,6 +1476,13 @@ getDeviceLibPrograms(const ContextImplPtr Context, + return Programs; + } + ++// Check if device image is compressed. ++static inline bool isDeviceImageCompressed(sycl_device_binary Bin) { ++ ++ auto currFormat = static_cast(Bin->Format); ++ return currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE; ++} ++ + ProgramManager::ProgramPtr ProgramManager::build( + ProgramPtr Program, const ContextImplPtr Context, + const std::string &CompileOptions, const std::string &LinkOptions, +@@ -1583,7 +1608,19 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { + if (EntriesB == EntriesE) + continue; + +- auto Img = std::make_unique(RawImg); ++ std::unique_ptr Img; ++ if (isDeviceImageCompressed(RawImg)) ++#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE ++ Img = std::make_unique(RawImg); ++#else ++ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime), ++ "Recieved a compressed device image, but " ++ "SYCL RT was built without ZSTD support." ++ "Aborting. "); ++#endif ++ else ++ Img = std::make_unique(RawImg); ++ + static uint32_t SequenceID = 0; + + // Fill the kernel argument mask map +@@ -1620,6 +1657,10 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) { + [&](auto &CurrentImg) { + return CurrentImg.first->getFormat() == Img->getFormat(); + }); ++ ++ // Check if image is compressed, and decompress it before dumping. ++ CheckAndDecompressImage(Img.get()); ++ + dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0); + } + +@@ -2097,6 +2138,9 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( + + auto &[KernelImagesState, KernelImages] = *StateImagesPair; + ++ // Check if device image is compressed and decompress it if needed ++ CheckAndDecompressImage(BinImage); ++ + if (KernelImages.empty()) { + KernelImagesState = ImgState; + KernelImages.push_back(BinImage); +diff --git a/sycl/test-e2e/Compression/Inputs/single_kernel.cpp b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp +new file mode 100644 +index 000000000000..eac8a63438f8 +--- /dev/null ++++ b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp +@@ -0,0 +1,17 @@ ++#include ++ ++int main() { ++ ++ sycl::queue q0; ++ int val = -1; ++ { ++ sycl::buffer buffer1(&val, sycl::range(1)); ++ ++ q0.submit([&](sycl::handler &cgh) { ++ auto acc = sycl::accessor(buffer1, cgh); ++ cgh.single_task([=] { acc[0] = acc[0] + 1; }); ++ }).wait(); ++ } ++ ++ return !(val == 0); ++} +diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp +new file mode 100644 +index 000000000000..1d8da7abc9d4 +--- /dev/null ++++ b/sycl/test-e2e/Compression/compression.cpp +@@ -0,0 +1,7 @@ ++// End-to-End test for testing device image compression. ++// REQUIRES: zstd ++// RUN: %{build} -O0 -g %S/Inputs/single_kernel.cpp -o %t_not_compress.out ++// RUN: %{build} -O0 -g --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out ++// RUN: %{run} %t_not_compress.out ++// RUN: %{run} %t_compress.out ++// RUN: not diff %t_not_compress.out %t_compress.out +diff --git a/sycl/test-e2e/Compression/compression_aot.cpp b/sycl/test-e2e/Compression/compression_aot.cpp +new file mode 100644 +index 000000000000..5b44b6a41e9c +--- /dev/null ++++ b/sycl/test-e2e/Compression/compression_aot.cpp +@@ -0,0 +1,5 @@ ++// End-to-End test for testing device image compression in AOT. ++// REQUIRES: zstd, opencl-aot, cpu ++ ++// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -O0 --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out ++// RUN: %{run} %t_compress.out +diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp +new file mode 100644 +index 000000000000..72eb3f090479 +--- /dev/null ++++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp +@@ -0,0 +1,56 @@ ++// End-to-End test for testing device image compression when we have two ++// translation units, one compressed and one not compressed. ++// REQUIRES: zstd, linux ++ ++// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared -fPIC -o %T/kernel1.so ++// RUN: %{build} -DENABLE_KERNEL2 -shared -fPIC -o %T/kernel2.so ++ ++// RUN: %{build} %T/kernel1.so %T/kernel2.so -o %t_compress.out ++// RUN: %{run} %t_compress.out ++#if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2) ++#include ++#include ++using namespace sycl; ++#endif ++ ++#ifdef ENABLE_KERNEL1 ++void kernel1() { ++ int data = -1; ++ { ++ buffer b(&data, range(1)); ++ queue q; ++ q.submit([&](sycl::handler &cgh) { ++ auto acc = accessor(b, cgh); ++ cgh.single_task([=] { acc[0] = abs(acc[0]); }); ++ }); ++ } ++ assert(data == 1); ++} ++#endif ++ ++#ifdef ENABLE_KERNEL2 ++void kernel2() { ++ int data = -2; ++ { ++ buffer b(&data, range(1)); ++ queue q; ++ q.submit([&](sycl::handler &cgh) { ++ auto acc = accessor(b, cgh); ++ cgh.single_task([=] { acc[0] = abs(acc[0]); }); ++ }); ++ } ++ assert(data == 2); ++} ++#endif ++ ++#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2) ++void kernel1(); ++void kernel2(); ++ ++int main() { ++ kernel1(); ++ kernel2(); ++ ++ return 0; ++} ++#endif +diff --git a/sycl/test-e2e/Compression/compression_separate_compile.cpp b/sycl/test-e2e/Compression/compression_separate_compile.cpp +new file mode 100644 +index 000000000000..9e47bbebdc87 +--- /dev/null ++++ b/sycl/test-e2e/Compression/compression_separate_compile.cpp +@@ -0,0 +1,70 @@ ++// End-to-End test for testing device image compression when we ++// seperatly compile and link device images. ++ ++// REQUIRES: zstd, opencl-aot, cpu, linux ++ ++////////////////////// Compile device images ++// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL1' -DENABLE_KERNEL1 -c %s -o %t_kernel1_aot.o ++// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL2' -DENABLE_KERNEL2 -c %s -o %t_kernel2_aot.o ++ ++////////////////////// Link device images ++// RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v ++ ++////////////////////// Compile the host program ++// RUN: %clangxx -fsycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o ++ ++////////////////////// Link the host program and compressed device images ++// RUN: %clangxx -fsycl %t_main.o %t_kernel1_aot.o %t_kernel2_aot.o %t_compressed_image.o -o %t_compress.out ++ ++// RUN: %{run} %t_compress.out ++ ++#include ++ ++using namespace sycl; ++ ++// Kernel 1 ++#ifdef ENABLE_KERNEL1 ++class test_kernel1; ++void run_kernel1(int *a, queue q) { ++ q.single_task([=]() { *a *= 3; }).wait(); ++} ++#endif ++ ++// Kernel 2 ++#ifdef ENABLE_KERNEL2 ++class test_kernel2; ++void run_kernel2(int *a, queue q) { ++ q.single_task([=]() { *a += 42; }).wait(); ++} ++#endif ++ ++// Main application. ++#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2) ++#include ++#include ++ ++#include ++ ++class kernel_init; ++void run_kernel1(int *a, queue q); ++void run_kernel2(int *a, queue q); ++int main() { ++ int retCode = 0; ++ queue q; ++ ++ if (!q.get_device().get_info()) ++ return 0; ++ ++ int *p = malloc_shared(1, q); ++ *p = 42; ++ ++ run_kernel1(p, q); ++ run_kernel2(p, q); ++ q.wait(); ++ ++ retCode = *p != (42 * 3 + 42); ++ ++ free(p, q); ++ return retCode; ++} ++#endif +diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp +new file mode 100644 +index 000000000000..8a4460f9b864 +--- /dev/null ++++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp +@@ -0,0 +1,4 @@ ++// using --offload-compress without zstd should throw an error. ++// REQUIRES: !zstd ++// RUN: not %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s ++// CHECK: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed. +diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py +index c3a68f45bfef..961bc79307ac 100644 +--- a/sycl/test-e2e/lit.cfg.py ++++ b/sycl/test-e2e/lit.cfg.py +@@ -295,6 +295,18 @@ sp = subprocess.getstatusoutput( + if sp[0] == 0: + config.available_features.add("preview-breaking-changes-supported") + ++# Check if clang is built with ZSTD and compression support. ++fPIC_opt = "-fPIC" if platform.system() != "Windows" else "" ++ps = subprocess.Popen( ++ [config.dpcpp_compiler, "-fsycl", "--offload-compress", "-shared", fPIC_opt, "-x", "c++", "-", "-o", "-"], ++ stdin=subprocess.PIPE, ++ stdout=subprocess.DEVNULL, ++ stderr=subprocess.PIPE, ++) ++op = ps.communicate(input=b"") ++if ps.wait() == 0: ++ config.available_features.add("zstd") ++ + # Check for CUDA SDK + check_cuda_file = "cuda_include.cpp" + with open_check_file(check_cuda_file) as fp: +diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt +index c0e04bfaf119..9ae7f4d79fb6 100644 +--- a/sycl/unittests/CMakeLists.txt ++++ b/sycl/unittests/CMakeLists.txt +@@ -52,6 +52,12 @@ add_subdirectory(accessor) + add_subdirectory(handler) + add_subdirectory(builtins) + add_subdirectory(buffer/l0_specific) ++ ++# Enable compression unit-tests only if zstd is present. ++if (LLVM_ENABLE_ZSTD) ++ add_subdirectory(compression) ++endif() ++ + # TODO Enable xpti tests for Windows + if (NOT WIN32) + add_subdirectory(xpti_trace) +diff --git a/sycl/unittests/compression/CMakeLists.txt b/sycl/unittests/compression/CMakeLists.txt +new file mode 100644 +index 000000000000..742e2d228072 +--- /dev/null ++++ b/sycl/unittests/compression/CMakeLists.txt +@@ -0,0 +1,3 @@ ++add_sycl_unittest(CompressionTests OBJECT ++ CompressionTests.cpp ++) +diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp +new file mode 100644 +index 000000000000..e9b50fa1cc2e +--- /dev/null ++++ b/sycl/unittests/compression/CompressionTests.cpp +@@ -0,0 +1,80 @@ ++//==------- CompressionTests.cpp --- compression unit test ----------------==// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++ ++#include ++ ++#include ++ ++#include ++ ++using namespace sycl::detail; ++ ++TEST(CompressionTest, SimpleCompression) { ++ ++ // Data to compress. ++ std::string data = "Hello World! I'm about to get compressed :P"; ++ size_t compressedDataSize = 0; ++ ++ auto compressedData = ZSTDCompressor::CompressBlob( ++ data.c_str(), data.size(), compressedDataSize, /*Compression level*/ 3); ++ ++ // Check if compression was successful. ++ EXPECT_NE(compressedData, nullptr); ++ EXPECT_GT(compressedDataSize, (size_t)0); ++ ++ // Decompress the data. ++ size_t decompressedSize = 0; ++ auto decompressedData = ZSTDCompressor::DecompressBlob( ++ compressedData.get(), compressedDataSize, decompressedSize); ++ ++ ASSERT_NE(decompressedData, nullptr); ++ ASSERT_GT(decompressedSize, (size_t)0); ++ ++ // Check if decompressed data is same as original data. ++ std::string decompressedStr((char *)decompressedData.get(), decompressedSize); ++ ASSERT_EQ(data, decompressedStr); ++} ++ ++// Test getting error code and error string. ++// Intentionally give incorrect input to decompress ++// to trigger an error. ++TEST(CompressionTest, NegativeErrorTest) { ++ std::string input = "Hello, World!"; ++ size_t decompressedSize = 0; ++ bool threwException = false; ++ try { ++ auto compressedData = ZSTDCompressor::DecompressBlob( ++ input.c_str(), input.size(), decompressedSize); ++ } catch (...) { ++ threwException = true; ++ } ++ ++ ASSERT_TRUE(threwException); ++} ++ ++// Test passing empty input to (de)compress. ++// There should be no error and the output should be empty. ++TEST(CompressionTest, EmptyInputTest) { ++ std::string input = ""; ++ size_t compressedSize = 0; ++ auto compressedData = ZSTDCompressor::CompressBlob( ++ input.c_str(), input.size(), compressedSize, 1); ++ ++ ASSERT_NE(compressedData, nullptr); ++ ASSERT_GT(compressedSize, (size_t)0); ++ ++ size_t decompressedSize = 0; ++ auto decompressedData = ZSTDCompressor::DecompressBlob( ++ compressedData.get(), compressedSize, decompressedSize); ++ ++ ASSERT_NE(decompressedData, nullptr); ++ ASSERT_EQ(decompressedSize, (size_t)0); ++ ++ std::string decompressedStr((char *)decompressedData.get(), decompressedSize); ++ ASSERT_EQ(input, decompressedStr); ++} diff --git a/build_files/build_environment/patches/level-zero.diff b/build_files/build_environment/patches/level-zero.diff index d28b3310d70..811f803c01f 100644 --- a/build_files/build_environment/patches/level-zero.diff +++ b/build_files/build_environment/patches/level-zero.diff @@ -1,29 +1,28 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index 7e27816..bd34055 100644 +index ca0746f..75d2890 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -81,13 +81,6 @@ if(MSVC) +@@ -92,13 +92,6 @@ if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /DYNAMICBASE") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DYNAMICBASE") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /guard:cf") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/DYNAMICBASE") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/guard:cf") - # enable Spectre Mitigation, not supported by clang-cl -- if(NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang) +- if((NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang) AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL IntelLLVM)) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qspectre") - endif() -- if(NOT CMAKE_C_COMPILER_ID STREQUAL Clang) +- if((NOT CMAKE_C_COMPILER_ID STREQUAL Clang) AND NOT (CMAKE_C_COMPILER_ID STREQUAL IntelLLVM)) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qspectre") - endif() endif() #CXX compiler support -@@ -128,7 +121,9 @@ if(MSVC) - set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") - - # enable CET shadow stack -- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /CETCOMPAT") -+ if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES ARM64) -+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /CETCOMPAT") -+ endif() +@@ -142,9 +135,6 @@ if(MSVC) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi") + set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/DEBUG ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/OPT:REF ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/OPT:ICF") +- # enable CET shadow stack +- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/CETCOMPAT") +- #Use of sccache with MSVC requires workaround of replacing /Zi with /Z7 #https://github.com/mozilla/sccache + if(USE_Z7) #sccache diff --git a/build_files/build_environment/patches/unifiedmemoryframework.diff b/build_files/build_environment/patches/unifiedmemoryframework.diff new file mode 100644 index 00000000000..efc4e447a07 --- /dev/null +++ b/build_files/build_environment/patches/unifiedmemoryframework.diff @@ -0,0 +1,13 @@ +# this otherwise leads to error C1258 due to too long path for XML log files +diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake +index 1d3e175..48770c4 100644 +--- a/cmake/helpers.cmake ++++ b/cmake/helpers.cmake +@@ -229,7 +229,6 @@ function(add_umf_target_compile_options name) + ${name} + PRIVATE /MD$<$:d> + $<$:/sdl> +- /analyze + /DYNAMICBASE + /W4 + /Gy diff --git a/build_files/build_environment/patches/unifiedruntime.diff b/build_files/build_environment/patches/unifiedruntime.diff new file mode 100644 index 00000000000..ab13aa68ca9 --- /dev/null +++ b/build_files/build_environment/patches/unifiedruntime.diff @@ -0,0 +1,13 @@ +# this is needed in order to be able to push textures to the host memory +diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp +index f68b2d93..90182568 100644 +--- a/source/adapters/level_zero/image.cpp ++++ b/source/adapters/level_zero/image.cpp +@@ -457,6 +457,7 @@ ur_result_t bindlessImagesCreateImpl(ur_context_handle_t hContext, + ZE2UR_CALL(zeContextMakeImageResident, + (hContext->ZeContext, hDevice->ZeDevice, ZeImage)); + } else if (MemAllocProperties.type == ZE_MEMORY_TYPE_DEVICE || ++ MemAllocProperties.type == ZE_MEMORY_TYPE_HOST || + MemAllocProperties.type == ZE_MEMORY_TYPE_SHARED) { + ZeStruct PitchedDesc; + PitchedDesc.ptr = reinterpret_cast(hImageMem); diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake index fd4234580b4..1c7a13f69b4 100644 --- a/build_files/cmake/platform/platform_unix.cmake +++ b/build_files/cmake/platform/platform_unix.cmake @@ -424,9 +424,9 @@ if(DEFINED LIBDIR) ${SYCL_ROOT_DIR}/lib/libsycl.so.* ${SYCL_ROOT_DIR}/lib/libpi_*.so ${SYCL_ROOT_DIR}/lib/libur_*.so + ${SYCL_ROOT_DIR}/lib/libur_*.so.* ) list(FILTER _sycl_runtime_libraries EXCLUDE REGEX ".*\.py") - list(REMOVE_ITEM _sycl_runtime_libraries "${SYCL_ROOT_DIR}/lib/libpi_opencl.so") list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries}) unset(_sycl_runtime_libraries) endif() diff --git a/build_files/cmake/platform/platform_win32.cmake b/build_files/cmake/platform/platform_win32.cmake index e1728da9ce3..a4be8842d9a 100644 --- a/build_files/cmake/platform/platform_win32.cmake +++ b/build_files/cmake/platform/platform_win32.cmake @@ -1040,11 +1040,19 @@ if(WITH_CYCLES AND WITH_CYCLES_EMBREE) ) if(EMBREE_SYCL_SUPPORT) - set(EMBREE_LIBRARIES - ${EMBREE_LIBRARIES} - optimized ${LIBDIR}/embree/lib/embree4_sycl.lib - debug ${LIBDIR}/embree/lib/embree4_sycl_d.lib - ) + # MSVC debug version of embree may have been compiled without SYCL support + if(EXISTS ${LIBDIR}/embree/lib/embree4_sycl_d.lib) + set(EMBREE_LIBRARIES + ${EMBREE_LIBRARIES} + optimized ${LIBDIR}/embree/lib/embree4_sycl.lib + debug ${LIBDIR}/embree/lib/embree4_sycl_d.lib + ) + else() + set(EMBREE_LIBRARIES + ${EMBREE_LIBRARIES} + optimized ${LIBDIR}/embree/lib/embree4_sycl.lib + ) + endif() endif() if(EMBREE_STATIC_LIB) @@ -1069,11 +1077,19 @@ if(WITH_CYCLES AND WITH_CYCLES_EMBREE) ) if(EMBREE_SYCL_SUPPORT) - set(EMBREE_LIBRARIES - ${EMBREE_LIBRARIES} - optimized ${LIBDIR}/embree/lib/embree_rthwif.lib - debug ${LIBDIR}/embree/lib/embree_rthwif_d.lib - ) + # MSVC debug version of embree may have been compiled without SYCL support + if(EXISTS ${LIBDIR}/embree/lib/embree_rthwif_d.lib) + set(EMBREE_LIBRARIES + ${EMBREE_LIBRARIES} + optimized ${LIBDIR}/embree/lib/embree_rthwif.lib + debug ${LIBDIR}/embree/lib/embree_rthwif_d.lib + ) + else() + set(EMBREE_LIBRARIES + ${EMBREE_LIBRARIES} + optimized ${LIBDIR}/embree/lib/embree_rthwif.lib + ) + endif() endif() endif() endif()