Build: upgrade DPC++/Level-Zero to 6.0.0-rc1/1.19.2 releases

including a backport of device image compression:
https://github.com/intel/llvm/pull/15124 that can be enabled by adding
`--offload-compress` compiler option.
This commit is contained in:
Xavier Hallade
2024-12-12 15:58:55 +01:00
committed by Xavier Hallade
parent 7fb480095e
commit bdb093f58d
11 changed files with 1271 additions and 190 deletions

View File

@@ -46,6 +46,9 @@ set(DPCPP_EXTRA_ARGS
-DSYCL_PI_UR_USE_FETCH_CONTENT=OFF
-DSYCL_PI_UR_SOURCE_DIR=${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime/
-DFETCHCONTENT_SOURCE_DIR_UNIFIED-MEMORY-FRAMEWORK=${BUILD_DIR}/unifiedmemoryframework/src/external_unifiedmemoryframework/
-DSYCL_UMF_DISABLE_HWLOC=ON
-DUMF_DISABLE_HWLOC=ON
-DUMF_BUILD_SHARED_LIBRARY=OFF
# Below here is copied from an invocation of buildbot/config.py
-DLLVM_ENABLE_ASSERTIONS=ON
-DLLVM_TARGETS_TO_BUILD=X86
@@ -71,7 +74,7 @@ set(DPCPP_EXTRA_ARGS
-DXPTI_ENABLE_WERROR=OFF
-DSYCL_CLANG_EXTRA_FLAGS=
-DSYCL_ENABLE_PLUGINS=level_zero
-DSYCL_ENABLE_KERNEL_FUSION=OFF
-DSYCL_ENABLE_EXTENSION_JIT=OFF
-DSYCL_ENABLE_MAJOR_RELEASE_PREVIEW_LIB=OFF
-DCMAKE_INSTALL_RPATH=\$ORIGIN
-DPython3_ROOT_DIR=${LIBDIR}/python/
@@ -79,10 +82,20 @@ set(DPCPP_EXTRA_ARGS
-DPYTHON_EXECUTABLE=${PYTHON_BINARY}
-DLLDB_ENABLE_CURSES=OFF
-DLLVM_ENABLE_TERMINFO=OFF
-DLLVM_ENABLE_ZSTD=FORCE_ON
-DLLVM_USE_STATIC_ZSTD=ON
-Dzstd_INCLUDE_DIR=${LIBDIR}/zstd/include
)
if(WIN32)
list(APPEND DPCPP_EXTRA_ARGS -DPython3_FIND_REGISTRY=NEVER)
list(APPEND DPCPP_EXTRA_ARGS
-DPython3_FIND_REGISTRY=NEVER
-Dzstd_LIBRARY=${LIBDIR}/zstd/lib/zstd_static.lib
)
else()
list(APPEND DPCPP_EXTRA_ARGS
-Dzstd_LIBRARY=${LIBDIR}/zstd/lib/libzstd.a
)
endif()
ExternalProject_Add(external_dpcpp
@@ -111,7 +124,7 @@ ExternalProject_Add(external_dpcpp
${PATCH_DIR}/dpcpp.diff &&
${PATCH_CMD} -p 1 -d
${BUILD_DIR}/dpcpp/src/external_dpcpp <
${PATCH_DIR}/dpcpp_13328.diff
${PATCH_DIR}/dpcpp_15124.diff
INSTALL_DIR ${LIBDIR}/dpcpp
)
@@ -142,9 +155,6 @@ if(WIN32)
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/lld-link.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/wasm-ld.exe
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/pi_unified_runtime.dll
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ur_adapter_level_zero.dll
COMMAND ${CMAKE_COMMAND} -E rm -f ${HARVEST_TARGET}/dpcpp/bin/ur_loader.dll
DEPENDEES install
)
endif()
@@ -152,7 +162,6 @@ else()
harvest(external_dpcpp dpcpp/bin dpcpp/bin "*")
harvest(external_dpcpp dpcpp/include dpcpp/include "*")
harvest(external_dpcpp dpcpp/lib dpcpp/lib "libsycl*")
# avoid harvesting libpi_unified_runtime and libur_ as they're optional.
harvest(external_dpcpp dpcpp/lib dpcpp/lib "libpi_level_zero*")
harvest(external_dpcpp dpcpp/lib dpcpp/lib "libur*")
harvest(external_dpcpp dpcpp/lib/clang dpcpp/lib/clang "*")
endif()

View File

@@ -67,6 +67,9 @@ ExternalProject_Add(external_unifiedruntime
URL_HASH ${UNIFIED_RUNTIME_HASH_TYPE}=${UNIFIED_RUNTIME_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/unifiedruntime
PATCH_COMMAND ${PATCH_CMD} -p 1 -d
${BUILD_DIR}/unifiedruntime/src/external_unifiedruntime <
${PATCH_DIR}/unifiedruntime.diff
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .
@@ -77,6 +80,9 @@ ExternalProject_Add(external_unifiedmemoryframework
URL_HASH ${UNIFIED_MEMORY_FRAMEWORK_HASH_TYPE}=${UNIFIED_MEMORY_FRAMEWORK_HASH}
DOWNLOAD_DIR ${DOWNLOAD_DIR}
PREFIX ${BUILD_DIR}/unifiedmemoryframework
PATCH_COMMAND ${PATCH_CMD} -p 1 -d
${BUILD_DIR}/unifiedmemoryframework/src/external_unifiedmemoryframework <
${PATCH_DIR}/unifiedmemoryframework.diff
CONFIGURE_COMMAND echo .
BUILD_COMMAND echo .
INSTALL_COMMAND echo .

View File

@@ -1039,25 +1039,26 @@ set(OPENPGL_HOMEPAGE http://www.openpgl.org/)
set(OPENPGL_LICENSE SPDX:Apache-2.0)
set(OPENPGL_COPYRIGHT "Copyright 2020 Intel Corporation.")
set(LEVEL_ZERO_VERSION 1.16.1)
# Default version used by DPCPP: unified-runtime/cmake/FetchLevelZero.cmake
set(LEVEL_ZERO_VERSION 1.19.2)
set(LEVEL_ZERO_NAME "oneAPI Level Zero")
set(LEVEL_ZERO_URI https://codeload.github.com/oneapi-src/level-zero/tar.gz/refs/tags/v${LEVEL_ZERO_VERSION})
set(LEVEL_ZERO_HASH f341dd6355d8da6ee9c29031642b8e8e4259f91c13c72d318c81663af048817e)
set(LEVEL_ZERO_HASH b0bea0a09d1a68f68ecf8694e58a60e199fa5785f91c2fd59f026133bc1c4f28)
set(LEVEL_ZERO_HASH_TYPE SHA256)
set(LEVEL_ZERO_FILE level-zero-${LEVEL_ZERO_VERSION}.tar.gz)
set(LEVEL_ZERO_HOMEPAGE https://github.com/oneapi-src/level-zero)
set(LEVEL_ZERO_LICENSE SPDX:MIT)
set(LEVEL_ZERO_COPYRIGHT "Copyright (C) 2019-2021 Intel Corporation")
set(LEVEL_ZERO_COPYRIGHT "Copyright (C) 2019-2024 Intel Corporation")
set(DPCPP_VERSION d2817d6d317db1143bb227168e85c409d5ab7c82) # tip of sycl-rel_5_2_0 as of 2024.05.24
set(DPCPP_VERSION v6.0.0-rc1)
set(DPCPP_URI https://github.com/intel/llvm/archive/${DPCPP_VERSION}.tar.gz)
set(DPCPP_HASH 86cbff157b79e29a6ebb96ba79c96f64b4296c33fcd896f60a5579955fca5724)
set(DPCPP_HASH 65f508827f48d9a894cc8c6fbdc9b15760681558ee29f6ebfef608034db99ca1)
set(DPCPP_HASH_TYPE SHA256)
set(DPCPP_FILE DPCPP-${DPCPP_VERSION}.tar.gz)
set(DPCPP_NAME DPC++)
set(DPCPP_HOMEPAGE "https://github.com/intel/llvm#oneapi-dpc-compiler")
set(DPCPP_LICENSE SPDX:Apache-2.0)
set(DPCPP_COPYRIGHT "Copyright (C) 2021 Intel Corporation")
set(DPCPP_COPYRIGHT "Copyright (C) 2021-2024 Intel Corporation")
########################
### DPCPP DEPS BEGIN ###
@@ -1068,14 +1069,14 @@ set(DPCPP_COPYRIGHT "Copyright (C) 2021 Intel Corporation")
# will take care of building them, unpack is being done in dpcpp_deps.cmake
# Source llvm/lib/SYCLLowerIR/CMakeLists.txt
set(VCINTRINSICS_VERSION da892e1982b6c25b9a133f85b4ac97142d8a3def)
set(VCINTRINSICS_VERSION b2565a03eb3cac07f5e8000fde971f95dc782c75)
set(VCINTRINSICS_URI https://github.com/intel/vc-intrinsics/archive/${VCINTRINSICS_VERSION}.tar.gz)
set(VCINTRINSICS_HASH 06b85bd988059939770eb6e6e6194562d17c5f5a5df9947af18696b3b1fe92f3)
set(VCINTRINSICS_HASH 4dfccbb60c2a929a97745c7a4cff04cc3f54aca1590b2763ca7842be59b55f01)
set(VCINTRINSICS_HASH_TYPE SHA256)
set(VCINTRINSICS_FILE vc-intrinsics-${VCINTRINSICS_VERSION}.tar.gz)
set(VCINTRINSICS_HOMEPAGE https://github.com/intel/vc-intrinsics)
set(VCINTRINSICS_LICENSE SPDX:MIT)
set(VCINTRINSICS_COPYRIGHT "Copyright (c) 2019 Intel Corporation")
set(VCINTRINSICS_COPYRIGHT "Copyright (c) 2019-2024 Intel Corporation")
# Source opencl/CMakeLists.txt
set(OPENCLHEADERS_VERSION 9ddb236e6eb3cf844f9e2f81677e1045f9bf838e)
@@ -1101,9 +1102,9 @@ set(ICDLOADER_COPYRIGHT " Copyright (c) 2020 The Khronos Group Inc.")
# Source sycl/cmake/modules/AddBoostMp11Headers.cmake
# Using external MP11 here, getting AddBoostMp11Headers.cmake to recognize
# our copy in boost directly was more trouble than it was worth.
set(MP11_VERSION ef7608b463298b881bc82eae4f45a4385ed74fca)
set(MP11_VERSION 863d8b8d2b20f2acd0b5870f23e553df9ce90e6c)
set(MP11_URI https://github.com/boostorg/mp11/archive/${MP11_VERSION}.tar.gz)
set(MP11_HASH ec2d68858dd4d04f9a1e3960fc94a58440715e1b3e746cc495438116715343e2)
set(MP11_HASH 525692267abb8086bb9cc2fe81fb96d73ac645dfa6825cb5114686aafe244e9f)
set(MP11_HASH_TYPE SHA256)
set(MP11_FILE mp11-${MP11_VERSION}.tar.gz)
set(MP11_HOMEPAGE https://github.com/boostorg/mp11)
@@ -1120,10 +1121,10 @@ set(SPIRV_HEADERS_HOMEPAGE https://github.com/KhronosGroup/SPIRV-Headers)
set(SPIRV_HEADERS_LICENSE SPDX:MIT-Khronos-old)
set(SPIRV_HEADERS_COPYRIGHT "Copyright (c) 2015-2024 The Khronos Group Inc.")
# Source sycl/plugins/unified_runtime/CMakeLists.txt
set(UNIFIED_RUNTIME_VERSION ec634ff05b067d7922ec45059dda94665e5dcd9b)
# Source sycl/cmake/modules/FetchUnifiedRuntime.cmake
set(UNIFIED_RUNTIME_VERSION 04db12683146673af9a09e923c19cf9a4ee96982)
set(UNIFIED_RUNTIME_URI https://github.com/oneapi-src/unified-runtime/archive/${UNIFIED_RUNTIME_VERSION}.tar.gz)
set(UNIFIED_RUNTIME_HASH ff15574aba6225d0c8a32f71866126551dee1aaacfa7894b8fdcc5e52e0f5da9)
set(UNIFIED_RUNTIME_HASH 1ebb6f6ec640dac6279ad84a705ddb48da12e29af9942a7e8fc087f23212f650)
set(UNIFIED_RUNTIME_HASH_TYPE SHA256)
set(UNIFIED_RUNTIME_FILE unified-runtime-${UNIFIED_RUNTIME_VERSION}.tar.gz)
set(UNIFIED_RUNTIME_HOMEPAGE https://github.com/oneapi-src/unified-runtime)
@@ -1131,9 +1132,9 @@ set(UNIFIED_RUNTIME_LICENSE SPDX:Apache-2.0 WITH LLVM-exception)
set(UNIFIED_RUNTIME_COPYRIGHT "Copyright (C) 2019-2024 Intel Corporation")
# Source unified-runtime/source/common/CMakeList.txt
set(UNIFIED_MEMORY_FRAMEWORK_VERSION 9bf7a0dc4dff76844e10edbb5c6e9d917536ef6d)
set(UNIFIED_MEMORY_FRAMEWORK_VERSION v0.9.0)
set(UNIFIED_MEMORY_FRAMEWORK_URI https://github.com/oneapi-src/unified-memory-framework/archive/${UNIFIED_MEMORY_FRAMEWORK_VERSION}.tar.gz)
set(UNIFIED_MEMORY_FRAMEWORK_HASH 7ff7d0be7be6e59693d238eab02b5a9741c820d3d995446781dcd7a2adaa28e9)
set(UNIFIED_MEMORY_FRAMEWORK_HASH 8594738d84abb4001bb0e962383b8a2604837e7bbc378d0771ecdab436c7d001)
set(UNIFIED_MEMORY_FRAMEWORK_HASH_TYPE SHA256)
set(UNIFIED_MEMORY_FRAMEWORK_FILE unified-memory-framework-${UNIFIED_MEMORY_FRAMEWORK_VERSION}.tar.gz)
set(UNIFIED_MEMORY_FRAMEWORK_HOMEPAGE https://github.com/oneapi-src/unified-memory-framework)

View File

@@ -1,44 +1,46 @@
diff -Naur llvm-sycl-nightly-20220501.orig\opencl/CMakeLists.txt llvm-sycl-nightly-20220501\opencl/CMakeLists.txt
--- llvm-sycl-nightly-20220501.orig/opencl/CMakeLists.txt 2022-04-29 13:47:11 -0600
+++ llvm-sycl-nightly-20220501/opencl/CMakeLists.txt 2022-05-21 15:25:06 -0600
@@ -11,6 +11,11 @@
)
endif()
+# Blender code below is determined to use FetchContent_Declare
+# temporarily allow it (but feed it our downloaded tarball
+# in the OpenCL_HEADERS variable
+set(FETCHCONTENT_FULLY_DISCONNECTED OFF)
+
# Repo URLs
set(OCL_HEADERS_REPO
@@ -77,5 +82,6 @@
FetchContent_MakeAvailable(ocl-icd)
add_library(OpenCL-ICD ALIAS OpenCL)
+set(FETCHCONTENT_FULLY_DISCONNECTED ON)
add_subdirectory(opencl-aot)
diff -Naur llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake
--- llvm-sycl-nightly-20220208.orig/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-02-08 09:17:24 -0700
+++ llvm-sycl-nightly-20220208/libdevice/cmake/modules/SYCLLibdevice.cmake 2022-05-24 11:35:51 -0600
@@ -36,7 +36,9 @@
add_custom_target(libsycldevice-obj)
diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake
index c1aac6d017ef..99d84d89b9cc 100644
--- a/libdevice/cmake/modules/SYCLLibdevice.cmake
+++ b/libdevice/cmake/modules/SYCLLibdevice.cmake
@@ -65,7 +65,9 @@ add_custom_target(libsycldevice-obj-new-offload)
add_custom_target(libsycldevice-spv)
add_custom_target(libsycldevice-bc)
-add_custom_target(libsycldevice DEPENDS
+# Blender: add ALL here otherwise this target will not build
+# and cause an error due to missing files during the install phase.
+add_custom_target(libsycldevice ALL DEPENDS
libsycldevice-obj
libsycldevice-spv)
libsycldevice-bc
libsycldevice-obj-new-offload
diff --git a/opencl/CMakeLists.txt b/opencl/CMakeLists.txt
index 6f618033a203..38a7d4ddec22 100644
--- a/opencl/CMakeLists.txt
+++ b/opencl/CMakeLists.txt
@@ -11,6 +11,11 @@ if (MSVC)
)
endif()
+# Blender code below is determined to use FetchContent_Declare
+# temporarily allow it (but feed it our downloaded tarball
+# in the OpenCL_HEADERS variable
+set(FETCHCONTENT_FULLY_DISCONNECTED OFF)
+
# Repo URLs
set(OCL_HEADERS_REPO
@@ -77,5 +82,6 @@ endif()
FetchContent_MakeAvailable(ocl-icd)
add_library(OpenCL-ICD ALIAS OpenCL)
+set(FETCHCONTENT_FULLY_DISCONNECTED ON)
add_subdirectory(opencl-aot)
diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt
index 00ce045f43c3..e044262e628e 100644
index 6ee321b7bff9..a77b94ede51f 100644
--- a/sycl/CMakeLists.txt
+++ b/sycl/CMakeLists.txt
@@ -188,7 +188,6 @@ install(FILES
@@ -203,7 +203,6 @@ install(FILES
COMPONENT sycl-headers)
include(AddBoostMp11Headers)
@@ -48,10 +50,10 @@ index 00ce045f43c3..e044262e628e 100644
# are not detected by copy_directory command.
diff --git a/sycl/cmake/modules/FetchBoostUnorderedHeaders.cmake b/sycl/cmake/modules/FetchBoostUnorderedHeaders.cmake
deleted file mode 100644
index a0f446055026..000000000000
index 5cabc6cccdbf..000000000000
--- a/sycl/cmake/modules/FetchBoostUnorderedHeaders.cmake
+++ /dev/null
@@ -1,129 +0,0 @@
@@ -1,101 +0,0 @@
-# Fetches the unordered boost module and its dependencies
-function(add_boost_module_headers)
- cmake_parse_arguments(
@@ -81,124 +83,126 @@ index a0f446055026..000000000000
- set(BOOST_UNORDERED_INCLUDE_DIRS ${BOOST_UNORDERED_INCLUDE_DIRS} "${BOOST_MODULE_SRC_DIR}/include" PARENT_SCOPE)
-endfunction(add_boost_module_headers)
-
-set(BOOST_UNORDERED_GIT_TAG bd24dfd284dbc70e7521915af0d8d049f74a1e85)
-# Author: joaquintides <joaquin@tid.es>
-# Date: Tue Jul 18 18:19:13 2023 +0200
-set(BOOST_UNORDERED_GIT_TAG 5e6b9291deb55567d41416af1e77c2516dc1250f)
-# Merge: 15cfef69 ccf9a76e
-# Author: joaquintides <joaquin.lopezmunoz@gmail.com>
-# Date: Sat Mar 16 09:18:41 2024 +0100
-#
-# updated concurrent map benchmark plots
-# Merge pull request #238 from boostorg/fix/gh-237
-add_boost_module_headers(NAME "unordered" SRC_DIR ${BOOST_UNORDERED_SOURCE_DIR} GIT_TAG ${BOOST_UNORDERED_GIT_TAG})
-
-set(BOOST_ASSERT_GIT_TAG 02256c84fd0cd58a139d9dc1b25b5019ca976ada)
-set(BOOST_ASSERT_GIT_TAG 447e0b3a331930f8708ade0e42683d12de9dfbc3)
-# Author: Peter Dimov <pdimov@gmail.com>
-# Date: Thu Jun 22 18:11:58 2023 +0300
-# Date: Sat Feb 3 20:43:55 2024 +0200
-#
-# Do not use std::source_location::current under nvcc. Fixes #32.
-# Use __builtin_FUNCSIG() under MSVC 19.35+. Fixes #35.
-add_boost_module_headers(NAME "assert" SRC_DIR ${BOOST_ASSERT_SOURCE_DIR} GIT_TAG ${BOOST_ASSERT_GIT_TAG})
-
-set(BOOST_CONFIG_GIT_TAG a1cf5d531405e62927b0257b5cbecc66a545b508)
-# Merge: f5726a26 a1edcd56
-set(BOOST_CONFIG_GIT_TAG 11385ec21012926e15a612e3bf9f9a71403c1e5b)
-# Merge: eef05e98 601598f8
-# Author: jzmaddock <john@johnmaddock.co.uk>
-# Date: Sat Apr 15 13:20:12 2023 +0100
-# Date: Sun Feb 4 09:46:22 2024 +0000
-#
-# Merge pull request #475 from boostorg/ci_2023_04
-# Merge branch 'develop'
-add_boost_module_headers(NAME "config" SRC_DIR ${BOOST_CONFIG_SOURCE_DIR} GIT_TAG ${BOOST_CONFIG_GIT_TAG})
-
-set(BOOST_CONTAINER_HASH_GIT_TAG 226eb066e949adbf37b220e993d64ecefeeaae99)
-set(BOOST_CONTAINER_HASH_GIT_TAG 6d214eb776456bf17fbee20780a034a23438084f)
-# Author: Peter Dimov <pdimov@gmail.com>
-# Date: Thu Jun 29 14:38:53 2023 +0300
-# Date: Wed Mar 6 05:13:53 2024 +0200
-#
-# Update .drone.jsonnet
-# Update .appveyor.yml
-add_boost_module_headers(NAME "container_hash" SRC_DIR ${BOOST_CONTAINER_HASH_SOURCE_DIR} GIT_TAG ${BOOST_CONTAINER_HASH_GIT_TAG})
-
-set(BOOST_CORE_GIT_TAG 216999e552e7f73e63c7bcc88b8ce9c179bbdbe2)
-# Author: Peter Dimov <pdimov@gmail.com>
-# Date: Sun Jun 25 13:46:53 2023 +0300
-set(BOOST_CORE_GIT_TAG 083b41c17e34f1fc9b43ab796b40d0d8bece685c)
-# Merge: 8cc2fda a973490
-# Author: Andrey Semashev <Lastique@users.noreply.github.com>
-# Date: Tue Mar 19 18:10:04 2024 +0300
-#
-# Avoid -Wsign-conversion warning in checked_delete.hpp
-# Merge pull request #169 from k3DW/feature/168
-add_boost_module_headers(NAME "core" SRC_DIR ${BOOST_CORE_SOURCE_DIR} GIT_TAG ${BOOST_CORE_GIT_TAG})
-
-# Describe is a dependency of container_hash
-set(BOOST_DESCRIBE_GIT_TAG a0eafb08100eb15a57b6dae6d270c0012a56aa21)
-# Merge: 1692c3e b54fda5
-set(BOOST_DESCRIBE_GIT_TAG 50719b212349f3d1268285c586331584d3dbfeb5)
-# Author: Peter Dimov <pdimov@gmail.com>
-# Date: Sun May 21 04:51:35 2023 +0300
-# Date: Sat Mar 23 20:27:08 2024 +0200
-#
-# Merge branch 'fix-deprecated-inline-static-variables' of https://github.com/Romain-Geissler-1A/describe into feature/pr-40
-# Update .drone.jsonnet
-add_boost_module_headers(NAME "describe" SRC_DIR ${BOOST_DESCRIBE_SOURCE_DIR} GIT_TAG ${BOOST_DESCRIBE_GIT_TAG})
-
-set(BOOST_MOVE_GIT_TAG f1fbb45134065deebe95249c616a967d4b66c809)
-# Author: Ion Gaztañaga <igaztanaga@gmail.com>
-# Date: Mon Mar 13 13:32:29 2023 +0100
-#
-# Use [[msvc::intrinsic] attribute if available in move/forward in order to improve debug experience
-add_boost_module_headers(NAME "move" SRC_DIR ${BOOST_MOVE_SOURCE_DIR} GIT_TAG ${BOOST_MOVE_GIT_TAG})
-
-# Reuse mp11 fetched earlier for DPC++ headers
-set(BOOST_UNORDERED_INCLUDE_DIRS ${BOOST_UNORDERED_INCLUDE_DIRS} "${BOOST_MP11_SOURCE_DIR}/include/")
-
-set(BOOST_PREDEF_GIT_TAG 392e4e767469e3469c9390f0d9cca16724dc3fc8)
-# Merge: a12c7fd 499d28e
-set(BOOST_PREDEF_GIT_TAG 0fdfb49c3a6789e50169a44e88a07cc889001106)
-# Merge: 392e4e7 614546d
-# Author: Rene Rivera <grafikrobot@gmail.com>
-# Date: Sun Feb 27 14:44:35 2022 -0600
-#
-# Release 1.14.
-add_boost_module_headers(NAME "predef" SRC_DIR ${BOOST_PREDEF_SOURCE_DIR} GIT_TAG ${BOOST_PREDEF_GIT_TAG})
-
-set(BOOST_PREPROCESSOR_GIT_TAG 667e87b3392db338a919cbe0213979713aca52e3)
-# Author: Peter Dimov <pdimov@gmail.com>
-# Date: Tue Aug 16 20:59:52 2022 +0300
-#
-# Change C test names to not conflict with the C++ ones
-add_boost_module_headers(NAME "preprocessor" SRC_DIR ${BOOST_PREPROCESSOR_SOURCE_DIR} GIT_TAG ${BOOST_PREPROCESSOR_GIT_TAG})
-
-set(BOOST_STATIC_ASSERT_GIT_TAG 45eec41c293bc5cd36ec3ed83671f70bc1aadc9f)
-# Merge: ba72d33 a1abfec
-# Author: jzmaddock <john@johnmaddock.co.uk>
-# Date: Tue Mar 8 09:35:50 2022 +0000
-#
-# Merge pull request #15 from sdarwin/githubactions
-add_boost_module_headers(NAME "static_assert" SRC_DIR ${BOOST_STATIC_ASSERT_SOURCE_DIR} GIT_TAG ${BOOST_STATIC_ASSERT_GIT_TAG})
-
-set(BOOST_THROW_EXCEPTION_GIT_TAG 23dd41e920ecd91237500ac6428f7d392a7a875c)
-# Author: Peter Dimov <pdimov@gmail.com>
-# Date: Sun Jun 25 16:12:57 2023 +0300
-#
-# Update ci.yml
-add_boost_module_headers(NAME "throw_exception" SRC_DIR ${BOOST_THROW_EXCEPTION_SOURCE_DIR} GIT_TAG ${BOOST_THROW_EXCEPTION_GIT_TAG})
-
-set(BOOST_TUPLE_GIT_TAG 500e4fa0a2845b96c0dd919e7485e0f216438a01)
-# Merge: aa16ae3 ded3c1d
-# Author: Joel de Guzman <djowel@gmail.com>
-# Date: Thu Dec 30 23:20:18 2021 +0800
-#
-# Merge pull request #21 from igaztanaga/patch-1
-add_boost_module_headers(NAME "tuple" SRC_DIR ${BOOST_TUPLE_SOURCE_DIR} GIT_TAG ${BOOST_TUPLE_GIT_TAG})
-
-set(BOOST_TYPE_TRAITS_GIT_TAG 89f5011b4a79d91e42735670e39f72cb25c86c72)
-# Merge: 55feb75 1ebd31e
-# Author: John Maddock <john@johnmaddock.co.uk>
-# Date: Fri Feb 24 18:02:30 2023 +0000
-# Date: Tue Oct 31 20:24:41 2023 -0500
-#
-# Merge branch 'develop'
-add_boost_module_headers(NAME "type_traits" SRC_DIR ${BOOST_TYPE_TRAITS_SOURCE_DIR} GIT_TAG ${BOOST_TYPE_TRAITS_GIT_TAG})
diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt
index ead8f2c83ab7..6fb8305a1a88 100644
--- a/sycl/source/CMakeLists.txt
+++ b/sycl/source/CMakeLists.txt
@@ -69,8 +69,6 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME)
target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB})
-add_boost_module_headers(NAME "predef" SRC_DIR ${BOOST_PREDEF_SOURCE_DIR} GIT_TAG ${BOOST_PREDEF_GIT_TAG})
-
-# Static assert is a dependency of core
-set(BOOST_STATIC_ASSERT_GIT_TAG ba72d3340f3dc6e773868107f35902292f84b07e)
-# Merge: 392e4e7 614546d
-# Author: Rene Rivera <grafikrobot@gmail.com>
-# Date: Tue Oct 31 20:24:41 2023 -0500
-#
-# Merge branch 'develop'
-add_boost_module_headers(NAME "static_assert" SRC_DIR ${BOOST_STATIC_ASSERT_SOURCE_DIR} GIT_TAG ${BOOST_STATIC_ASSERT_GIT_TAG})
-
-set(BOOST_THROW_EXCEPTION_GIT_TAG 7c8ec2114bc1f9ab2a8afbd629b96fbdd5901294)
-# Author: Peter Dimov <pdimov@gmail.com>
-# Date: Sat Jan 6 19:41:56 2024 +0200
-#
-# Add -Wundef to test/Jamfile
-add_boost_module_headers(NAME "throw_exception" SRC_DIR ${BOOST_THROW_EXCEPTION_SOURCE_DIR} GIT_TAG ${BOOST_THROW_EXCEPTION_GIT_TAG})
diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
index 41268945b078..895065b8f1a7 100644
--- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake
+++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake
@@ -128,11 +128,11 @@ if(SYCL_PI_UR_USE_FETCH_CONTENT)
# to link statically on windows
if(WIN32)
set(UMF_BUILD_SHARED_LIBRARY OFF CACHE INTERNAL "Build UMF shared library")
- set(UMF_LINK_HWLOC_STATICALLY ON CACHE INTERNAL "static HWLOC")
- else()
- set(UMF_DISABLE_HWLOC ${SYCL_UMF_DISABLE_HWLOC} CACHE INTERNAL "Disable hwloc for UMF")
endif()
- target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS})
-
# pi_win_proxy_loader
if (WIN32)
include_directories(${LLVM_EXTERNAL_SYCL_SOURCE_DIR}/pi_win_proxy_loader)
+ set(UMF_LINK_HWLOC_STATICALLY OFF CACHE INTERNAL "static HWLOC")
+ set(UMF_DISABLE_HWLOC ${SYCL_UMF_DISABLE_HWLOC} CACHE INTERNAL "Disable hwloc for UMF")
+
fetch_adapter_source(level_zero
${UNIFIED_RUNTIME_REPO}
${UNIFIED_RUNTIME_TAG}
diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp
index 454a43fe9953..50fa14fbbd5d 100644
--- a/sycl/source/detail/graph_impl.hpp
+++ b/sycl/source/detail/graph_impl.hpp
@@ -28,6 +28,7 @@
#include <list>
#include <set>
#include <shared_mutex>
+#include <sstream>
namespace sycl {
inline namespace _V1 {
diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp
index 2a128ba9a901..db8faf6e188f 100644
--- a/sycl/source/detail/kernel_bundle_impl.hpp
+++ b/sycl/source/detail/kernel_bundle_impl.hpp
@@ -24,6 +24,7 @@
#include <cstdint>
#include <cstring>
#include <memory>
+#include <sstream>
#include <vector>
#include "split_string.hpp"
diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp
index 87a41d9fe105..c0a572b4d144 100644
index f170b55a6348..9f593359c7f3 100644
--- a/sycl/source/detail/kernel_program_cache.hpp
+++ b/sycl/source/detail/kernel_program_cache.hpp
@@ -18,12 +18,10 @@
@@ -19,12 +19,10 @@
#include <atomic>
#include <condition_variable>
@@ -212,8 +216,8 @@ index 87a41d9fe105..c0a572b4d144 100644
// For testing purposes
class MockKernelProgramCache;
@@ -113,8 +111,8 @@ public:
std::pair<std::uintptr_t, sycl::detail::pi::PiDevice>;
@@ -123,8 +121,8 @@ public:
using CommonProgramKeyT = std::pair<std::uintptr_t, ur_device_handle_t>;
struct ProgramCache {
- ::boost::unordered_map<ProgramCacheKeyT, ProgramBuildResultPtr> Cache;
@@ -223,23 +227,22 @@ index 87a41d9fe105..c0a572b4d144 100644
size_t size() const noexcept { return Cache.size(); }
};
@@ -138,10 +136,8 @@ public:
@@ -152,23 +150,15 @@ public:
};
using KernelBuildResultPtr = std::shared_ptr<KernelBuildResult>;
- using KernelByNameT =
- ::boost::unordered_map<std::string, KernelBuildResultPtr>;
- using KernelCacheT =
- ::boost::unordered_map<sycl::detail::pi::PiProgram, KernelByNameT>;
- ::boost::unordered_map<ur_program_handle_t, KernelByNameT>;
+ using KernelByNameT = std::map<std::string, KernelBuildResultPtr>;
+ using KernelCacheT = std::map<sycl::detail::pi::PiProgram, KernelByNameT>;
+ using KernelCacheT = std::map<ur_program_handle_t, KernelByNameT>;
using KernelFastCacheKeyT =
std::tuple<SerializedObj, sycl::detail::pi::PiDevice, std::string,
@@ -149,13 +145,7 @@ public:
std::tuple<SerializedObj, ur_device_handle_t, std::string, std::string>;
using KernelFastCacheValT =
std::tuple<sycl::detail::pi::PiKernel, std::mutex *,
const KernelArgMask *, sycl::detail::pi::PiProgram>;
std::tuple<ur_kernel_handle_t, std::mutex *, const KernelArgMask *,
ur_program_handle_t>;
- // This container is used as a fast path for retrieving cached kernels.
- // unordered_flat_map is used here to reduce lookup overhead.
- // The slow path is used only once for each newly created kernel, so the
@@ -252,7 +255,7 @@ index 87a41d9fe105..c0a572b4d144 100644
~KernelProgramCache() = default;
diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt
index 71d2413c2974..0c55c870c4b8 100644
index ec740f913ed4..c0e04bfaf119 100644
--- a/sycl/unittests/CMakeLists.txt
+++ b/sycl/unittests/CMakeLists.txt
@@ -1,6 +1,5 @@
@@ -262,6 +265,3 @@ index 71d2413c2974..0c55c870c4b8 100644
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
--
2.30.1.windows.1

View File

@@ -1,12 +0,0 @@
diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt
index bfeb4a763da84..94400f08154a2 100644
--- a/clang/lib/Driver/CMakeLists.txt
+++ b/clang/lib/Driver/CMakeLists.txt
@@ -98,6 +98,7 @@ add_clang_library(clangDriver
DEPENDS
ClangDriverOptions
+ DeviceConfigFile
LINK_LIBS
clangBasic

View File

@@ -0,0 +1,1036 @@
diff --git a/buildbot/configure.py b/buildbot/configure.py
index 692a64fd3125..424df77c2513 100644
--- a/buildbot/configure.py
+++ b/buildbot/configure.py
@@ -178,6 +178,8 @@ def do_configure(args):
"-DLLVM_ENABLE_PROJECTS={}".format(llvm_enable_projects),
"-DSYCL_BUILD_PI_HIP_PLATFORM={}".format(sycl_build_pi_hip_platform),
"-DLLVM_BUILD_TOOLS=ON",
+ "-DLLVM_ENABLE_ZSTD=ON",
+ "-DLLVM_USE_STATIC_ZSTD=ON",
"-DSYCL_ENABLE_WERROR={}".format(sycl_werror),
"-DCMAKE_INSTALL_PREFIX={}".format(install_dir),
"-DSYCL_INCLUDE_TESTS=ON", # Explicitly include all kinds of SYCL tests.
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 17dd75a265a8..8472efa23fdf 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -10043,6 +10043,19 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
SmallString<128> TargetTripleOpt = TT.getArchName();
bool WrapFPGADevice = false;
bool FPGAEarly = false;
+
+ // Validate and propogate CLI options related to device image compression.
+ // -offload-compress
+ if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) {
+ WrapperArgs.push_back(
+ C.getArgs().MakeArgString(Twine("-offload-compress")));
+ // -offload-compression-level=<>
+ if (Arg *A = C.getInputArgs().getLastArg(
+ options::OPT_offload_compression_level_EQ))
+ WrapperArgs.push_back(C.getArgs().MakeArgString(
+ Twine("-offload-compression-level=") + A->getValue()));
+ }
+
if (Arg *A = C.getInputArgs().getLastArg(options::OPT_fsycl_link_EQ)) {
WrapFPGADevice = true;
FPGAEarly = (A->getValue() == StringRef("early"));
diff --git a/clang/test/Driver/clang-offload-wrapper-zstd.c b/clang/test/Driver/clang-offload-wrapper-zstd.c
new file mode 100644
index 000000000000..bc5fadfc4cf4
--- /dev/null
+++ b/clang/test/Driver/clang-offload-wrapper-zstd.c
@@ -0,0 +1,40 @@
+// REQUIRES: zstd && (system-windows || system-linux)
+
+// clang-offload-wrapper compression test: checks that the wrapper can compress the device images.
+// Checks the '--offload-compress', '--offload-compression-level', and '--offload-compression-threshold'
+// CLI options.
+
+// --- Prepare test data by creating the debice binary image.
+// RUN: echo -e -n 'device binary image1\n' > %t.bin
+// RUN: echo -e -n '[Category1]\nint_prop1=1|10\n[Category2]\nint_prop2=1|20\n' > %t.props
+// RUN: echo -e -n 'kernel1\nkernel2\n' > %t.sym
+// RUN: echo -e -n 'Manifest file - arbitrary data generated by the toolchain\n' > %t.mnf
+// RUN: echo '[Code|Properties|Symbols|Manifest]' > %t.img1
+// RUN: echo %t.bin"|"%t.props"|"%t.sym"|"%t.mnf >> %t.img1
+
+///////////////////////////////////////////////////////
+// Compress the test image using clang-offload-wrapper.
+///////////////////////////////////////////////////////
+
+// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \
+// RUN: --offload-compress --offload-compression-level=9 --offload-compression-threshold=0 \
+// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS
+
+// CHECK-COMPRESS: [Compression] Original image size:
+// CHECK-COMPRESS: [Compression] Compressed image size:
+// CHECK-COMPRESS: [Compression] Compression level used: 9
+
+///////////////////////////////////////////////////////////
+// Check that there is no compression when the threshold is set to a value higher than the image size
+// or '--offload-compress' is not set.
+///////////////////////////////////////////////////////////
+
+// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \
+// RUN: --offload-compress --offload-compression-level=3 --offload-compression-threshold=1000 \
+// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS
+
+// RUN: clang-offload-wrapper -kind=sycl -target=TARGET -batch %t.img1 -o %t.wrapped.bc -v \
+// RUN: --offload-compression-level=3 --offload-compression-threshold=0 \
+// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS
+
+// CHECK-NO-COMPRESS-NOT: [Compression] Original image size:
diff --git a/clang/test/Driver/sycl-offload-wrapper-compression.cpp b/clang/test/Driver/sycl-offload-wrapper-compression.cpp
new file mode 100644
index 000000000000..1ef9282ee359
--- /dev/null
+++ b/clang/test/Driver/sycl-offload-wrapper-compression.cpp
@@ -0,0 +1,14 @@
+///
+/// Check if '--offload-compress' and '--offload-compression-level' CLI
+/// options are passed to the clang-offload-wrapper.
+///
+
+// RUN: %clangxx -### -fsycl --offload-compress --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-COMPRESS
+// CHECK-COMPRESS: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}}"-offload-compression-level=3"{{.*}}
+
+// Make sure that the compression options are not passed when --offload-compress is not set.
+// RUN: %clangxx -### -fsycl %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS
+// RUN: %clangxx -### -fsycl --offload-compression-level=3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-COMPRESS
+
+// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compress"{{.*}}
+// CHECK-NO-COMPRESS-NOT: {{.*}}clang-offload-wrapper{{.*}}"-offload-compression-level=3"{{.*}}
diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt
index 9cb5ec66c644..3195f18fe23c 100644
--- a/clang/tools/clang-offload-wrapper/CMakeLists.txt
+++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt
@@ -10,6 +10,7 @@ add_clang_tool(clang-offload-wrapper
set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS
clangBasic
+ LLVMSupport
)
add_dependencies(clang clang-offload-wrapper)
diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
index 5facbb4329ab..6ce4c5dc36ab 100644
--- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
+++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
@@ -67,6 +67,9 @@
#include <string>
#include <tuple>
+// For device image compression.
+#include <llvm/Support/Compression.h>
+
#define OPENMP_OFFLOAD_IMAGE_VERSION "1.0"
using namespace llvm;
@@ -139,6 +142,25 @@ static cl::list<std::string> Inputs(cl::Positional, cl::OneOrMore,
cl::desc("<input files>"),
cl::cat(ClangOffloadWrapperCategory));
+// CLI options for device image compression.
+static cl::opt<bool> OffloadCompressDevImgs(
+ "offload-compress", cl::init(false), cl::Optional,
+ cl::desc("Enable device image compression using ZSTD."),
+ cl::cat(ClangOffloadWrapperCategory));
+
+static cl::opt<int>
+ OffloadCompressLevel("offload-compression-level", cl::init(10),
+ cl::Optional,
+ cl::desc("ZSTD Compression level. Default: 10"),
+ cl::cat(ClangOffloadWrapperCategory));
+
+static cl::opt<int>
+ OffloadCompressThreshold("offload-compression-threshold", cl::init(512),
+ cl::Optional,
+ cl::desc("Threshold (in bytes) over which to "
+ "compress images. Default: 512"),
+ cl::cat(ClangOffloadWrapperCategory));
+
// Binary image formats supported by this tool. The support basically means
// mapping string representation given at the command line to a value from this
// enum. No format checking is performed.
@@ -146,8 +168,9 @@ enum BinaryImageFormat {
none, // image kind is not determined
native, // image kind is native
// portable image kinds go next
- spirv, // SPIR-V
- llvmbc // LLVM bitcode
+ spirv, // SPIR-V
+ llvmbc, // LLVM bitcode
+ compressed_none // compressed image with unknown format
};
/// Sets offload kind.
@@ -265,6 +288,8 @@ static StringRef formatToString(BinaryImageFormat Fmt) {
return "llvmbc";
case BinaryImageFormat::native:
return "native";
+ case BinaryImageFormat::compressed_none:
+ return "compressed_none";
}
llvm_unreachable("bad format");
@@ -1083,10 +1108,66 @@ private:
return FBinOrErr.takeError();
Fbin = *FBinOrErr;
} else {
- Fbin = addDeviceImageToModule(
- ArrayRef<char>(Bin->getBufferStart(), Bin->getBufferSize()),
- Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind,
- Img.Tgt);
+
+ // If '--offload-compress' option is specified and zstd is not
+ // available, throw an error.
+ if (OffloadCompressDevImgs && !llvm::compression::zstd::isAvailable()) {
+ return createStringError(
+ inconvertibleErrorCode(),
+ "'--offload-compress' option is specified but zstd "
+ "is not available. The device image will not be "
+ "compressed.");
+ }
+
+ // Don't compress if the user explicitly specifies the binary image
+ // format or if the image is smaller than OffloadCompressThreshold
+ // bytes.
+ if (Kind != OffloadKind::SYCL || !OffloadCompressDevImgs ||
+ Img.Fmt != BinaryImageFormat::none ||
+ !llvm::compression::zstd::isAvailable() ||
+ static_cast<int>(Bin->getBufferSize()) < OffloadCompressThreshold) {
+ Fbin = addDeviceImageToModule(
+ ArrayRef<char>(Bin->getBufferStart(), Bin->getBufferSize()),
+ Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind,
+ Img.Tgt);
+ } else {
+
+ // Compress the image using zstd.
+ SmallVector<uint8_t, 512> CompressedBuffer;
+#if LLVM_ENABLE_EXCEPTIONS
+ try {
+#endif
+ llvm::compression::zstd::compress(
+ ArrayRef<unsigned char>(
+ (const unsigned char *)(Bin->getBufferStart()),
+ Bin->getBufferSize()),
+ CompressedBuffer, OffloadCompressLevel);
+#if LLVM_ENABLE_EXCEPTIONS
+ } catch (const std::exception &ex) {
+ return createStringError(inconvertibleErrorCode(),
+ std::string("Failed to compress the device image: \n") +
+ std::string(ex.what()));
+ }
+#endif
+ if (Verbose)
+ errs() << "[Compression] Original image size: "
+ << Bin->getBufferSize() << "\n"
+ << "[Compression] Compressed image size: "
+ << CompressedBuffer.size() << "\n"
+ << "[Compression] Compression level used: "
+ << OffloadCompressLevel << "\n";
+
+ // Add the compressed image to the module.
+ Fbin = addDeviceImageToModule(
+ ArrayRef<char>((const char *)CompressedBuffer.data(),
+ CompressedBuffer.size()),
+ Twine(OffloadKindTag) + Twine(ImgId) + Twine(".data"), Kind,
+ Img.Tgt);
+
+ // Change image format to compressed_none.
+ Ffmt = ConstantInt::get(Type::getInt8Ty(C),
+ BinaryImageFormat::compressed_none);
+ }
}
if (Kind == OffloadKind::SYCL) {
diff --git a/sycl/doc/UsersManual.md b/sycl/doc/UsersManual.md
index 6a9e12882518..3f184edc12de 100644
--- a/sycl/doc/UsersManual.md
+++ b/sycl/doc/UsersManual.md
@@ -195,6 +195,19 @@ and not recommended to use in production environment.
which may or may not perform additional inlining.
Default value is 225.
+**`--offload-compress`**
+
+ Enables device image compression for SYCL offloading. Device images
+ are compressed using `zstd` compression algorithm and only if their size
+ exceeds 512 bytes.
+ Default value is false.
+
+**`--offload-compression-level=<int>`**
+
+ `zstd` compression level used to compress device images when `--offload-
+ compress` is enabled.
+ The default value is 10.
+
## Target toolchain options
**`-Xsycl-target-backend=<T> "options"`**
diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt
index f0067a45b20a..be683124f7f9 100644
--- a/sycl/source/CMakeLists.txt
+++ b/sycl/source/CMakeLists.txt
@@ -69,6 +69,13 @@ function(add_sycl_rt_library LIB_NAME LIB_OBJ_NAME)
target_link_libraries(${LIB_NAME} PRIVATE ${ARG_XPTI_LIB})
endif()
+ if (NOT LLVM_ENABLE_ZSTD)
+ target_compile_definitions(${LIB_OBJ_NAME} PRIVATE SYCL_RT_ZSTD_NOT_AVAIABLE)
+ else()
+ target_link_libraries(${LIB_NAME} PRIVATE ${zstd_STATIC_LIBRARY})
+ target_include_directories(${LIB_OBJ_NAME} PRIVATE ${zstd_INCLUDE_DIR})
+ endif()
+
target_include_directories(${LIB_OBJ_NAME} PRIVATE ${BOOST_UNORDERED_INCLUDE_DIRS})
# ur_win_proxy_loader
diff --git a/sycl/source/detail/compiler.hpp b/sycl/source/detail/compiler.hpp
index 9d2777e863ee..827ee61ef811 100644
--- a/sycl/source/detail/compiler.hpp
+++ b/sycl/source/detail/compiler.hpp
@@ -115,7 +115,8 @@ enum sycl_device_binary_type : uint8_t {
SYCL_DEVICE_BINARY_TYPE_NONE = 0, // undetermined
SYCL_DEVICE_BINARY_TYPE_NATIVE = 1, // specific to a device
SYCL_DEVICE_BINARY_TYPE_SPIRV = 2,
- SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3
+ SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE = 3,
+ SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE = 4
};
// Device binary descriptor version supported by this library.
diff --git a/sycl/source/detail/compression.hpp b/sycl/source/detail/compression.hpp
new file mode 100644
index 000000000000..1878010cd5ba
--- /dev/null
+++ b/sycl/source/detail/compression.hpp
@@ -0,0 +1,153 @@
+//==---------- compression.hpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE
+
+#include <sycl/exception.hpp>
+
+#include <iostream>
+#include <memory>
+#include <zstd.h>
+
+#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
+#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2)
+
+namespace sycl {
+inline namespace _V1 {
+namespace detail {
+
+// Singleton class to handle ZSTD compression and decompression.
+class ZSTDCompressor {
+private:
+ ZSTDCompressor() {}
+
+ ZSTDCompressor(const ZSTDCompressor &) = delete;
+ ZSTDCompressor &operator=(const ZSTDCompressor &) = delete;
+ ~ZSTDCompressor() {}
+
+ // Get the singleton instance of the ZSTDCompressor class.
+ static ZSTDCompressor &GetSingletonInstance() {
+ static ZSTDCompressor instance;
+ return instance;
+ }
+
+ // Public APIs
+public:
+ // Blob (de)compression do not assume format/structure of the input buffer.
+ // This function can be used in future for compression in on-disk cache.
+ static std::unique_ptr<char> CompressBlob(const char *src, size_t srcSize,
+ size_t &dstSize, int level) {
+ auto &instance = GetSingletonInstance();
+
+ // Lazy initialize compression context.
+ if (!instance.m_ZSTD_compression_ctx) {
+
+ // Call ZSTD_createCCtx() and ZSTD_freeCCtx() to create and free the
+ // context.
+ instance.m_ZSTD_compression_ctx =
+ std::unique_ptr<ZSTD_CCtx, size_t (*)(ZSTD_CCtx *)>(ZSTD_createCCtx(),
+ ZSTD_freeCCtx);
+ if (!instance.m_ZSTD_compression_ctx) {
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ "Failed to create ZSTD compression context");
+ }
+ }
+
+ // Get maximum size of the compressed buffer and allocate it.
+ auto dstBufferSize = ZSTD_compressBound(srcSize);
+ auto dstBuffer = std::unique_ptr<char>(new char[dstBufferSize]);
+
+ if (!dstBuffer)
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ "Failed to allocate memory for compressed data");
+
+ // Compress the input buffer.
+ dstSize =
+ ZSTD_compressCCtx(instance.m_ZSTD_compression_ctx.get(),
+ static_cast<void *>(dstBuffer.get()), dstBufferSize,
+ static_cast<const void *>(src), srcSize, level);
+
+ // Store the error code if compression failed.
+ if (ZSTD_isError(dstSize))
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ ZSTD_getErrorName(dstSize));
+
+ // Pass ownership of the buffer to the caller.
+ return dstBuffer;
+ }
+
+ static size_t GetDecompressedSize(const char *src, size_t srcSize) {
+ size_t dstBufferSize = ZSTD_getFrameContentSize(src, srcSize);
+
+ if (dstBufferSize == ZSTD_CONTENTSIZE_UNKNOWN ||
+ dstBufferSize == ZSTD_CONTENTSIZE_ERROR) {
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ "Error determining size of uncompressed data.");
+ }
+ return dstBufferSize;
+ }
+
+ static std::unique_ptr<char> DecompressBlob(const char *src, size_t srcSize,
+ size_t &dstSize) {
+ auto &instance = GetSingletonInstance();
+
+ // Lazy initialize decompression context.
+ if (!instance.m_ZSTD_decompression_ctx) {
+
+ // Call ZSTD_createDCtx() and ZSTD_freeDCtx() to create and free the
+ // context.
+ instance.m_ZSTD_decompression_ctx =
+ std::unique_ptr<ZSTD_DCtx, size_t (*)(ZSTD_DCtx *)>(ZSTD_createDCtx(),
+ ZSTD_freeDCtx);
+ if (!instance.m_ZSTD_decompression_ctx) {
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ "Failed to create ZSTD decompression context");
+ }
+ }
+
+ // Size of decompressed image can be larger than what we can allocate
+ // on heap. In that case, we need to use streaming decompression.
+ auto dstBufferSize = GetDecompressedSize(src, srcSize);
+
+ // Allocate buffer for decompressed data.
+ auto dstBuffer = std::unique_ptr<char>(new char[dstBufferSize]);
+
+ if (!dstBuffer)
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ "Failed to allocate memory for decompressed data");
+
+ dstSize =
+ ZSTD_decompressDCtx(instance.m_ZSTD_decompression_ctx.get(),
+ static_cast<void *>(dstBuffer.get()), dstBufferSize,
+ static_cast<const void *>(src), srcSize);
+
+ // In case of decompression error, return the error message and set dstSize
+ // to 0.
+ if (ZSTD_isError(dstSize)) {
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ ZSTD_getErrorName(dstSize));
+ }
+
+ // Pass ownership of the buffer to the caller.
+ return dstBuffer;
+ }
+
+ // Data fields
+private:
+ // ZSTD contexts. Reusing ZSTD context speeds up subsequent (de)compression.
+ std::unique_ptr<ZSTD_CCtx, size_t (*)(ZSTD_CCtx *)> m_ZSTD_compression_ctx{
+ nullptr, nullptr};
+ std::unique_ptr<ZSTD_DCtx, size_t (*)(ZSTD_DCtx *)> m_ZSTD_decompression_ctx{
+ nullptr, nullptr};
+};
+} // namespace detail
+} // namespace _V1
+} // namespace sycl
+
+#endif // SYCL_RT_ZSTD_NOT_AVAIABLE
diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp
index beb9bae0dd0f..2be48d4a38fc 100644
--- a/sycl/source/detail/device_binary_image.cpp
+++ b/sycl/source/detail/device_binary_image.cpp
@@ -9,6 +9,9 @@
#include <detail/device_binary_image.hpp>
#include <sycl/detail/ur.hpp>
+// For device image compression.
+#include <detail/compression.hpp>
+
#include <algorithm>
#include <cstring>
#include <memory>
@@ -167,6 +170,8 @@ void RTDeviceBinaryImage::init(sycl_device_binary Bin) {
// it when invoking the offload wrapper job
Format = static_cast<ur::DeviceBinaryType>(Bin->Format);
+ // For compressed images, we delay determining the format until the image is
+ // decompressed.
if (Format == SYCL_DEVICE_BINARY_TYPE_NONE)
// try to determine the format; may remain "NONE"
Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize());
@@ -226,6 +231,48 @@ DynRTDeviceBinaryImage::~DynRTDeviceBinaryImage() {
Bin = nullptr;
}
+#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE
+CompressedRTDeviceBinaryImage::CompressedRTDeviceBinaryImage(
+ sycl_device_binary CompressedBin)
+ : RTDeviceBinaryImage() {
+
+ // 'CompressedBin' is part of the executable image loaded into memory
+ // which can't be modified easily. So, we need to make a copy of it.
+ Bin = new sycl_device_binary_struct(*CompressedBin);
+
+ // Get the decompressed size of the binary image.
+ m_ImageSize = ZSTDCompressor::GetDecompressedSize(
+ reinterpret_cast<const char *>(Bin->BinaryStart),
+ static_cast<size_t>(Bin->BinaryEnd - Bin->BinaryStart));
+
+ init(Bin);
+}
+
+void CompressedRTDeviceBinaryImage::Decompress() {
+
+ size_t CompressedDataSize =
+ static_cast<size_t>(Bin->BinaryEnd - Bin->BinaryStart);
+
+ size_t DecompressedSize = 0;
+ m_DecompressedData = ZSTDCompressor::DecompressBlob(
+ reinterpret_cast<const char *>(Bin->BinaryStart), CompressedDataSize,
+ DecompressedSize);
+
+ Bin->BinaryStart =
+ reinterpret_cast<const unsigned char *>(m_DecompressedData.get());
+ Bin->BinaryEnd = Bin->BinaryStart + DecompressedSize;
+
+ Bin->Format = ur::getBinaryImageFormat(Bin->BinaryStart, getSize());
+ Format = static_cast<ur::DeviceBinaryType>(Bin->Format);
+}
+
+CompressedRTDeviceBinaryImage::~CompressedRTDeviceBinaryImage() {
+ // De-allocate device binary struct.
+ delete Bin;
+ Bin = nullptr;
+}
+#endif // SYCL_RT_ZSTD_NOT_AVAIABLE
+
} // namespace detail
} // namespace _V1
} // namespace sycl
diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp
index 49047a04ae77..203427b89ca4 100644
--- a/sycl/source/detail/device_binary_image.hpp
+++ b/sycl/source/detail/device_binary_image.hpp
@@ -7,12 +7,12 @@
//===----------------------------------------------------------------------===//
#pragma once
+#include "ur_utils.hpp"
#include <detail/compiler.hpp>
#include <sycl/detail/common.hpp>
#include <sycl/detail/os_util.hpp>
#include <sycl/detail/ur.hpp>
#include <ur_api.h>
-#include "ur_utils.hpp"
#include <sycl/detail/iostream_proxy.hpp>
@@ -158,7 +158,10 @@ public:
virtual void print() const;
virtual void dump(std::ostream &Out) const;
- size_t getSize() const {
+ // getSize will be overridden in the case of compressed binary images.
+ // In that case, we return the size of uncompressed data, instead of
+ // BinaryEnd - BinaryStart.
+ virtual size_t getSize() const {
assert(Bin && "binary image data not set");
return static_cast<size_t>(Bin->BinaryEnd - Bin->BinaryStart);
}
@@ -276,6 +279,35 @@ protected:
std::unique_ptr<char[]> Data;
};
+#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE
+// Compressed device binary image. Decompression happens when the image is
+// actually used to build a program.
+// Also, frees the decompressed data in destructor.
+class CompressedRTDeviceBinaryImage : public RTDeviceBinaryImage {
+public:
+ CompressedRTDeviceBinaryImage(sycl_device_binary Bin);
+ ~CompressedRTDeviceBinaryImage() override;
+
+ void Decompress();
+
+ // We return the size of decompressed data, not the size of compressed data.
+ size_t getSize() const override {
+ assert(Bin && "binary image data not set");
+ return m_ImageSize;
+ }
+
+ bool IsCompressed() const { return m_DecompressedData.get() == nullptr; }
+ void print() const override {
+ RTDeviceBinaryImage::print();
+ std::cerr << " COMPRESSED\n";
+ }
+
+private:
+ std::unique_ptr<char> m_DecompressedData;
+ size_t m_ImageSize;
+};
+#endif // SYCL_RT_ZSTD_NOT_AVAIABLE
+
} // namespace detail
} // namespace _V1
} // namespace sycl
diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp
index 01e567fb03c3..e7357a45d117 100644
--- a/sycl/source/detail/program_manager/program_manager.cpp
+++ b/sycl/source/detail/program_manager/program_manager.cpp
@@ -174,6 +174,8 @@ isDeviceBinaryTypeSupported(const context &C,
return "SPIR-V";
case SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE:
return "LLVM IR";
+ case SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE:
+ return "compressed none";
}
assert(false && "Unknown device image format");
return "unknown";
@@ -721,6 +723,14 @@ setSpecializationConstants(const std::shared_ptr<device_image_impl> &InputImpl,
}
}
+static inline void CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) {
+#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE
+ if (auto CompImg = dynamic_cast<CompressedRTDeviceBinaryImage *>(Img))
+ if (CompImg->IsCompressed())
+ CompImg->Decompress();
+#endif
+}
+
// When caching is enabled, the returned UrProgram will already have
// its ref count incremented.
ur_program_handle_t ProgramManager::getBuiltURProgram(
@@ -773,6 +783,10 @@ ur_program_handle_t ProgramManager::getBuiltURProgram(
collectDeviceImageDepsForImportedSymbols(Img, Device);
DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end());
+ // Decompress all DeviceImagesToLink
+ for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink)
+ CheckAndDecompressImage(BinImg);
+
std::vector<const RTDeviceBinaryImage *> AllImages;
AllImages.reserve(ImageDeps.size() + 1);
AllImages.push_back(&Img);
@@ -1317,6 +1331,10 @@ ProgramManager::getDeviceImage(const std::string &KernelName,
Device);
}
}
+
+ // Decompress the image if it is compressed.
+ CheckAndDecompressImage(Img);
+
if (Img) {
CheckJITCompilationForImage(Img, JITCompilationIsRequired);
@@ -1458,6 +1476,13 @@ getDeviceLibPrograms(const ContextImplPtr Context,
return Programs;
}
+// Check if device image is compressed.
+static inline bool isDeviceImageCompressed(sycl_device_binary Bin) {
+
+ auto currFormat = static_cast<ur::DeviceBinaryType>(Bin->Format);
+ return currFormat == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE;
+}
+
ProgramManager::ProgramPtr ProgramManager::build(
ProgramPtr Program, const ContextImplPtr Context,
const std::string &CompileOptions, const std::string &LinkOptions,
@@ -1583,7 +1608,19 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) {
if (EntriesB == EntriesE)
continue;
- auto Img = std::make_unique<RTDeviceBinaryImage>(RawImg);
+ std::unique_ptr<RTDeviceBinaryImage> Img;
+ if (isDeviceImageCompressed(RawImg))
+#ifndef SYCL_RT_ZSTD_NOT_AVAIABLE
+ Img = std::make_unique<CompressedRTDeviceBinaryImage>(RawImg);
+#else
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ "Recieved a compressed device image, but "
+ "SYCL RT was built without ZSTD support."
+ "Aborting. ");
+#endif
+ else
+ Img = std::make_unique<RTDeviceBinaryImage>(RawImg);
+
static uint32_t SequenceID = 0;
// Fill the kernel argument mask map
@@ -1620,6 +1657,10 @@ void ProgramManager::addImages(sycl_device_binaries DeviceBinary) {
[&](auto &CurrentImg) {
return CurrentImg.first->getFormat() == Img->getFormat();
});
+
+ // Check if image is compressed, and decompress it before dumping.
+ CheckAndDecompressImage(Img.get());
+
dumpImage(*Img, NeedsSequenceID ? ++SequenceID : 0);
}
@@ -2097,6 +2138,9 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
auto &[KernelImagesState, KernelImages] = *StateImagesPair;
+ // Check if device image is compressed and decompress it if needed
+ CheckAndDecompressImage(BinImage);
+
if (KernelImages.empty()) {
KernelImagesState = ImgState;
KernelImages.push_back(BinImage);
diff --git a/sycl/test-e2e/Compression/Inputs/single_kernel.cpp b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp
new file mode 100644
index 000000000000..eac8a63438f8
--- /dev/null
+++ b/sycl/test-e2e/Compression/Inputs/single_kernel.cpp
@@ -0,0 +1,17 @@
+#include <sycl/detail/core.hpp>
+
+int main() {
+
+ sycl::queue q0;
+ int val = -1;
+ {
+ sycl::buffer<int, 1> buffer1(&val, sycl::range(1));
+
+ q0.submit([&](sycl::handler &cgh) {
+ auto acc = sycl::accessor(buffer1, cgh);
+ cgh.single_task([=] { acc[0] = acc[0] + 1; });
+ }).wait();
+ }
+
+ return !(val == 0);
+}
diff --git a/sycl/test-e2e/Compression/compression.cpp b/sycl/test-e2e/Compression/compression.cpp
new file mode 100644
index 000000000000..1d8da7abc9d4
--- /dev/null
+++ b/sycl/test-e2e/Compression/compression.cpp
@@ -0,0 +1,7 @@
+// End-to-End test for testing device image compression.
+// REQUIRES: zstd
+// RUN: %{build} -O0 -g %S/Inputs/single_kernel.cpp -o %t_not_compress.out
+// RUN: %{build} -O0 -g --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out
+// RUN: %{run} %t_not_compress.out
+// RUN: %{run} %t_compress.out
+// RUN: not diff %t_not_compress.out %t_compress.out
diff --git a/sycl/test-e2e/Compression/compression_aot.cpp b/sycl/test-e2e/Compression/compression_aot.cpp
new file mode 100644
index 000000000000..5b44b6a41e9c
--- /dev/null
+++ b/sycl/test-e2e/Compression/compression_aot.cpp
@@ -0,0 +1,5 @@
+// End-to-End test for testing device image compression in AOT.
+// REQUIRES: zstd, opencl-aot, cpu
+
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -O0 --offload-compress --offload-compression-level=3 %S/Inputs/single_kernel.cpp -o %t_compress.out
+// RUN: %{run} %t_compress.out
diff --git a/sycl/test-e2e/Compression/compression_multiple_tu.cpp b/sycl/test-e2e/Compression/compression_multiple_tu.cpp
new file mode 100644
index 000000000000..72eb3f090479
--- /dev/null
+++ b/sycl/test-e2e/Compression/compression_multiple_tu.cpp
@@ -0,0 +1,56 @@
+// End-to-End test for testing device image compression when we have two
+// translation units, one compressed and one not compressed.
+// REQUIRES: zstd, linux
+
+// RUN: %{build} --offload-compress -DENABLE_KERNEL1 -shared -fPIC -o %T/kernel1.so
+// RUN: %{build} -DENABLE_KERNEL2 -shared -fPIC -o %T/kernel2.so
+
+// RUN: %{build} %T/kernel1.so %T/kernel2.so -o %t_compress.out
+// RUN: %{run} %t_compress.out
+#if defined(ENABLE_KERNEL1) || defined(ENABLE_KERNEL2)
+#include <sycl/builtins.hpp>
+#include <sycl/detail/core.hpp>
+using namespace sycl;
+#endif
+
+#ifdef ENABLE_KERNEL1
+void kernel1() {
+ int data = -1;
+ {
+ buffer<int> b(&data, range(1));
+ queue q;
+ q.submit([&](sycl::handler &cgh) {
+ auto acc = accessor(b, cgh);
+ cgh.single_task([=] { acc[0] = abs(acc[0]); });
+ });
+ }
+ assert(data == 1);
+}
+#endif
+
+#ifdef ENABLE_KERNEL2
+void kernel2() {
+ int data = -2;
+ {
+ buffer<int> b(&data, range(1));
+ queue q;
+ q.submit([&](sycl::handler &cgh) {
+ auto acc = accessor(b, cgh);
+ cgh.single_task([=] { acc[0] = abs(acc[0]); });
+ });
+ }
+ assert(data == 2);
+}
+#endif
+
+#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2)
+void kernel1();
+void kernel2();
+
+int main() {
+ kernel1();
+ kernel2();
+
+ return 0;
+}
+#endif
diff --git a/sycl/test-e2e/Compression/compression_separate_compile.cpp b/sycl/test-e2e/Compression/compression_separate_compile.cpp
new file mode 100644
index 000000000000..9e47bbebdc87
--- /dev/null
+++ b/sycl/test-e2e/Compression/compression_separate_compile.cpp
@@ -0,0 +1,70 @@
+// End-to-End test for testing device image compression when we
+// seperatly compile and link device images.
+
+// REQUIRES: zstd, opencl-aot, cpu, linux
+
+////////////////////// Compile device images
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL1' -DENABLE_KERNEL1 -c %s -o %t_kernel1_aot.o
+// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 -fsycl-host-compiler=clang++ -fsycl-host-compiler-options='-std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -DENABLE_KERNEL2' -DENABLE_KERNEL2 -c %s -o %t_kernel2_aot.o
+
+////////////////////// Link device images
+// RUN: %clangxx --offload-compress -fsycl -fsycl-link -fsycl-targets=spir64_x86_64 -fPIC %t_kernel1_aot.o %t_kernel2_aot.o -o %t_compressed_image.o -v
+
+////////////////////// Compile the host program
+// RUN: %clangxx -fsycl -std=c++17 -Wno-attributes -Wno-deprecated-declarations -fPIC -c %s -o %t_main.o
+
+////////////////////// Link the host program and compressed device images
+// RUN: %clangxx -fsycl %t_main.o %t_kernel1_aot.o %t_kernel2_aot.o %t_compressed_image.o -o %t_compress.out
+
+// RUN: %{run} %t_compress.out
+
+#include <sycl/detail/core.hpp>
+
+using namespace sycl;
+
+// Kernel 1
+#ifdef ENABLE_KERNEL1
+class test_kernel1;
+void run_kernel1(int *a, queue q) {
+ q.single_task<test_kernel1>([=]() { *a *= 3; }).wait();
+}
+#endif
+
+// Kernel 2
+#ifdef ENABLE_KERNEL2
+class test_kernel2;
+void run_kernel2(int *a, queue q) {
+ q.single_task<test_kernel2>([=]() { *a += 42; }).wait();
+}
+#endif
+
+// Main application.
+#if not defined(ENABLE_KERNEL1) && not defined(ENABLE_KERNEL2)
+#include <sycl/properties/all_properties.hpp>
+#include <sycl/usm.hpp>
+
+#include <iostream>
+
+class kernel_init;
+void run_kernel1(int *a, queue q);
+void run_kernel2(int *a, queue q);
+int main() {
+ int retCode = 0;
+ queue q;
+
+ if (!q.get_device().get_info<info::device::usm_shared_allocations>())
+ return 0;
+
+ int *p = malloc_shared<int>(1, q);
+ *p = 42;
+
+ run_kernel1(p, q);
+ run_kernel2(p, q);
+ q.wait();
+
+ retCode = *p != (42 * 3 + 42);
+
+ free(p, q);
+ return retCode;
+}
+#endif
diff --git a/sycl/test-e2e/Compression/no_zstd_warning.cpp b/sycl/test-e2e/Compression/no_zstd_warning.cpp
new file mode 100644
index 000000000000..8a4460f9b864
--- /dev/null
+++ b/sycl/test-e2e/Compression/no_zstd_warning.cpp
@@ -0,0 +1,4 @@
+// using --offload-compress without zstd should throw an error.
+// REQUIRES: !zstd
+// RUN: not %{build} -O0 -g --offload-compress %S/Inputs/single_kernel.cpp -o %t_compress.out 2>&1 | FileCheck %s
+// CHECK: '--offload-compress' option is specified but zstd is not available. The device image will not be compressed.
diff --git a/sycl/test-e2e/lit.cfg.py b/sycl/test-e2e/lit.cfg.py
index c3a68f45bfef..961bc79307ac 100644
--- a/sycl/test-e2e/lit.cfg.py
+++ b/sycl/test-e2e/lit.cfg.py
@@ -295,6 +295,18 @@ sp = subprocess.getstatusoutput(
if sp[0] == 0:
config.available_features.add("preview-breaking-changes-supported")
+# Check if clang is built with ZSTD and compression support.
+fPIC_opt = "-fPIC" if platform.system() != "Windows" else ""
+ps = subprocess.Popen(
+ [config.dpcpp_compiler, "-fsycl", "--offload-compress", "-shared", fPIC_opt, "-x", "c++", "-", "-o", "-"],
+ stdin=subprocess.PIPE,
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.PIPE,
+)
+op = ps.communicate(input=b"")
+if ps.wait() == 0:
+ config.available_features.add("zstd")
+
# Check for CUDA SDK
check_cuda_file = "cuda_include.cpp"
with open_check_file(check_cuda_file) as fp:
diff --git a/sycl/unittests/CMakeLists.txt b/sycl/unittests/CMakeLists.txt
index c0e04bfaf119..9ae7f4d79fb6 100644
--- a/sycl/unittests/CMakeLists.txt
+++ b/sycl/unittests/CMakeLists.txt
@@ -52,6 +52,12 @@ add_subdirectory(accessor)
add_subdirectory(handler)
add_subdirectory(builtins)
add_subdirectory(buffer/l0_specific)
+
+# Enable compression unit-tests only if zstd is present.
+if (LLVM_ENABLE_ZSTD)
+ add_subdirectory(compression)
+endif()
+
# TODO Enable xpti tests for Windows
if (NOT WIN32)
add_subdirectory(xpti_trace)
diff --git a/sycl/unittests/compression/CMakeLists.txt b/sycl/unittests/compression/CMakeLists.txt
new file mode 100644
index 000000000000..742e2d228072
--- /dev/null
+++ b/sycl/unittests/compression/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_sycl_unittest(CompressionTests OBJECT
+ CompressionTests.cpp
+)
diff --git a/sycl/unittests/compression/CompressionTests.cpp b/sycl/unittests/compression/CompressionTests.cpp
new file mode 100644
index 000000000000..e9b50fa1cc2e
--- /dev/null
+++ b/sycl/unittests/compression/CompressionTests.cpp
@@ -0,0 +1,80 @@
+//==------- CompressionTests.cpp --- compression unit test ----------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <detail/compression.hpp>
+
+#include <string>
+
+#include <gtest/gtest.h>
+
+using namespace sycl::detail;
+
+TEST(CompressionTest, SimpleCompression) {
+
+ // Data to compress.
+ std::string data = "Hello World! I'm about to get compressed :P";
+ size_t compressedDataSize = 0;
+
+ auto compressedData = ZSTDCompressor::CompressBlob(
+ data.c_str(), data.size(), compressedDataSize, /*Compression level*/ 3);
+
+ // Check if compression was successful.
+ EXPECT_NE(compressedData, nullptr);
+ EXPECT_GT(compressedDataSize, (size_t)0);
+
+ // Decompress the data.
+ size_t decompressedSize = 0;
+ auto decompressedData = ZSTDCompressor::DecompressBlob(
+ compressedData.get(), compressedDataSize, decompressedSize);
+
+ ASSERT_NE(decompressedData, nullptr);
+ ASSERT_GT(decompressedSize, (size_t)0);
+
+ // Check if decompressed data is same as original data.
+ std::string decompressedStr((char *)decompressedData.get(), decompressedSize);
+ ASSERT_EQ(data, decompressedStr);
+}
+
+// Test getting error code and error string.
+// Intentionally give incorrect input to decompress
+// to trigger an error.
+TEST(CompressionTest, NegativeErrorTest) {
+ std::string input = "Hello, World!";
+ size_t decompressedSize = 0;
+ bool threwException = false;
+ try {
+ auto compressedData = ZSTDCompressor::DecompressBlob(
+ input.c_str(), input.size(), decompressedSize);
+ } catch (...) {
+ threwException = true;
+ }
+
+ ASSERT_TRUE(threwException);
+}
+
+// Test passing empty input to (de)compress.
+// There should be no error and the output should be empty.
+TEST(CompressionTest, EmptyInputTest) {
+ std::string input = "";
+ size_t compressedSize = 0;
+ auto compressedData = ZSTDCompressor::CompressBlob(
+ input.c_str(), input.size(), compressedSize, 1);
+
+ ASSERT_NE(compressedData, nullptr);
+ ASSERT_GT(compressedSize, (size_t)0);
+
+ size_t decompressedSize = 0;
+ auto decompressedData = ZSTDCompressor::DecompressBlob(
+ compressedData.get(), compressedSize, decompressedSize);
+
+ ASSERT_NE(decompressedData, nullptr);
+ ASSERT_EQ(decompressedSize, (size_t)0);
+
+ std::string decompressedStr((char *)decompressedData.get(), decompressedSize);
+ ASSERT_EQ(input, decompressedStr);
+}

View File

@@ -1,29 +1,28 @@
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7e27816..bd34055 100644
index ca0746f..75d2890 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -81,13 +81,6 @@ if(MSVC)
@@ -92,13 +92,6 @@ if(MSVC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /DYNAMICBASE")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /DYNAMICBASE")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /guard:cf")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/DYNAMICBASE")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/guard:cf")
- # enable Spectre Mitigation, not supported by clang-cl
- if(NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang)
- if((NOT CMAKE_CXX_COMPILER_ID STREQUAL Clang) AND (NOT CMAKE_CXX_COMPILER_ID STREQUAL IntelLLVM))
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Qspectre")
- endif()
- if(NOT CMAKE_C_COMPILER_ID STREQUAL Clang)
- if((NOT CMAKE_C_COMPILER_ID STREQUAL Clang) AND NOT (CMAKE_C_COMPILER_ID STREQUAL IntelLLVM))
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Qspectre")
- endif()
endif()
#CXX compiler support
@@ -128,7 +121,9 @@ if(MSVC)
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
# enable CET shadow stack
- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /CETCOMPAT")
+ if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES ARM64)
+ set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /CETCOMPAT")
+ endif()
@@ -142,9 +135,6 @@ if(MSVC)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/DEBUG ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/OPT:REF ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/OPT:ICF")
- # enable CET shadow stack
- set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_CXX_LINKER_WRAPPER_FLAG}/CETCOMPAT")
-
#Use of sccache with MSVC requires workaround of replacing /Zi with /Z7
#https://github.com/mozilla/sccache
if(USE_Z7) #sccache

View File

@@ -0,0 +1,13 @@
# this otherwise leads to error C1258 due to too long path for XML log files
diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake
index 1d3e175..48770c4 100644
--- a/cmake/helpers.cmake
+++ b/cmake/helpers.cmake
@@ -229,7 +229,6 @@ function(add_umf_target_compile_options name)
${name}
PRIVATE /MD$<$<CONFIG:Debug>:d>
$<$<CONFIG:Release>:/sdl>
- /analyze
/DYNAMICBASE
/W4
/Gy

View File

@@ -0,0 +1,13 @@
# this is needed in order to be able to push textures to the host memory
diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp
index f68b2d93..90182568 100644
--- a/source/adapters/level_zero/image.cpp
+++ b/source/adapters/level_zero/image.cpp
@@ -457,6 +457,7 @@ ur_result_t bindlessImagesCreateImpl(ur_context_handle_t hContext,
ZE2UR_CALL(zeContextMakeImageResident,
(hContext->ZeContext, hDevice->ZeDevice, ZeImage));
} else if (MemAllocProperties.type == ZE_MEMORY_TYPE_DEVICE ||
+ MemAllocProperties.type == ZE_MEMORY_TYPE_HOST ||
MemAllocProperties.type == ZE_MEMORY_TYPE_SHARED) {
ZeStruct<ze_image_pitched_exp_desc_t> PitchedDesc;
PitchedDesc.ptr = reinterpret_cast<void *>(hImageMem);

View File

@@ -424,9 +424,9 @@ if(DEFINED LIBDIR)
${SYCL_ROOT_DIR}/lib/libsycl.so.*
${SYCL_ROOT_DIR}/lib/libpi_*.so
${SYCL_ROOT_DIR}/lib/libur_*.so
${SYCL_ROOT_DIR}/lib/libur_*.so.*
)
list(FILTER _sycl_runtime_libraries EXCLUDE REGEX ".*\.py")
list(REMOVE_ITEM _sycl_runtime_libraries "${SYCL_ROOT_DIR}/lib/libpi_opencl.so")
list(APPEND PLATFORM_BUNDLED_LIBRARIES ${_sycl_runtime_libraries})
unset(_sycl_runtime_libraries)
endif()

View File

@@ -1040,11 +1040,19 @@ if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
)
if(EMBREE_SYCL_SUPPORT)
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree4_sycl.lib
debug ${LIBDIR}/embree/lib/embree4_sycl_d.lib
)
# MSVC debug version of embree may have been compiled without SYCL support
if(EXISTS ${LIBDIR}/embree/lib/embree4_sycl_d.lib)
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree4_sycl.lib
debug ${LIBDIR}/embree/lib/embree4_sycl_d.lib
)
else()
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree4_sycl.lib
)
endif()
endif()
if(EMBREE_STATIC_LIB)
@@ -1069,11 +1077,19 @@ if(WITH_CYCLES AND WITH_CYCLES_EMBREE)
)
if(EMBREE_SYCL_SUPPORT)
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree_rthwif.lib
debug ${LIBDIR}/embree/lib/embree_rthwif_d.lib
)
# MSVC debug version of embree may have been compiled without SYCL support
if(EXISTS ${LIBDIR}/embree/lib/embree_rthwif_d.lib)
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree_rthwif.lib
debug ${LIBDIR}/embree/lib/embree_rthwif_d.lib
)
else()
set(EMBREE_LIBRARIES
${EMBREE_LIBRARIES}
optimized ${LIBDIR}/embree/lib/embree_rthwif.lib
)
endif()
endif()
endif()
endif()