From 26ed4d38925c2061cfa697266a1704d72b11c75a Mon Sep 17 00:00:00 2001 From: "Sahar A. Kashi" Date: Tue, 24 Sep 2024 14:35:24 +0200 Subject: [PATCH] Cycles: Linux Support for HIP-RT This change switches Cycles to an opensource HIP-RT library which implements hardware ray-tracing. This library is now used on both Windows and Linux. While there should be no noticeable changes on Windows, on Linux this adds support for hardware ray-tracing on AMD GPUs. The majority of the change is typical platform code to add new library to the dependency builder, and a change in the way how ahead-of-time (AoT) kernels are compiled. There are changes in Cycles itself, but they are rather straightforward: some APIs changed in the opensource version of the library. There are a couple of extra files which are needed for this to work: hiprt02003_6.1_amd.hipfb and oro_compiled_kernels.hipfb. There are some assumptions in the HIP-RT library about how they are available. Currently they follow the same rule as AoT kernels for oneAPI: - On Windows they are next to blender.exe - On Linux they are in the lib/ folder Performance comparison on Ubuntu 22.04.5: ``` GPU: AMD Radeon PRO W7800 Driver: amdgpu-install_6.1.60103-1_all.deb main hip-rt attic 0.1414s 0.0932s barbershop_interior 0.1563s 0.1258s bistro 0.2134s 0.1597s bmw27 0.0119s 0.0099s classroom 0.1006s 0.0803s fishy_cat 0.0248s 0.0178s junkshop 0.0916s 0.0713s koro 0.0589s 0.0720s monster 0.0435s 0.0385s pabellon 0.0543s 0.0391s sponza 0.0223s 0.0180s spring 0.1026s 1.5145s victor 0.1901s 0.1239s wdas_cloud 0.1153s 0.1125s ``` Co-authored-by: Brecht Van Lommel Co-authored-by: Ray Molenkamp Co-authored-by: Sergey Sharybin Pull Request: https://projects.blender.org/blender/blender/pulls/121050 --- CMakeLists.txt | 9 +- .../buildbot/config/blender_linux.cmake | 2 + .../buildbot/config/blender_windows.cmake | 2 + build_files/cmake/Modules/FindHIP.cmake | 21 +- build_files/cmake/Modules/FindHIPRT.cmake | 27 +-- .../cmake/config/blender_release.cmake | 5 +- .../cmake/platform/platform_unix.cmake | 2 + extern/hipew/include/hiprtew.h | 112 +++++++-- extern/hipew/src/hipew.c | 1 - extern/hipew/src/hiprtew.cc | 13 +- intern/cycles/blender/addon/properties.py | 7 +- intern/cycles/cmake/external_libs.cmake | 5 +- intern/cycles/device/hiprt/device_impl.cpp | 104 +++++--- intern/cycles/device/hiprt/device_impl.h | 4 +- intern/cycles/device/hiprt/queue.cpp | 21 +- intern/cycles/device/queue.h | 1 + intern/cycles/kernel/CMakeLists.txt | 223 ++++++++++++++---- intern/cycles/kernel/device/hiprt/bvh.h | 2 +- intern/cycles/kernel/device/hiprt/common.h | 58 ++--- intern/cycles/kernel/device/hiprt/globals.h | 8 +- .../kernel/device/hiprt/hiprt_kernels.h | 14 +- lib/linux_x64 | 2 +- lib/windows_x64 | 2 +- source/creator/CMakeLists.txt | 11 + 24 files changed, 463 insertions(+), 193 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1af183fcd49..be34333785c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -719,11 +719,8 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")) mark_as_advanced(WITH_CYCLES_DEVICE_HIP) mark_as_advanced(CYCLES_HIP_BINARIES_ARCH) - # HIPRT is only available on Windows for now. - if(WIN32) - option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF) - mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT) - endif() + option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF) + mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT) endif() # Apple Metal @@ -2825,8 +2822,6 @@ if(FIRST_RUN) info_cfg_option(WITH_CYCLES_ONEAPI_BINARIES) info_cfg_option(WITH_CYCLES_DEVICE_HIP) info_cfg_option(WITH_CYCLES_HIP_BINARIES) - endif() - if(WIN32) info_cfg_option(WITH_CYCLES_DEVICE_HIPRT) endif() endif() diff --git a/build_files/buildbot/config/blender_linux.cmake b/build_files/buildbot/config/blender_linux.cmake index f3f10b6d382..b33ca01791a 100644 --- a/build_files/buildbot/config/blender_linux.cmake +++ b/build_files/buildbot/config/blender_linux.cmake @@ -13,3 +13,5 @@ message(STATUS "Building in Rocky 8 Linux 64bit environment") set(WITH_DOC_MANPAGE OFF CACHE BOOL "" FORCE) set(WITH_CYCLES_TEST_OSL ON CACHE BOOL "" FORCE) + +set(HIPRT_COMPILER_PARALLEL_JOBS 4 CACHE STRING "" FORCE) diff --git a/build_files/buildbot/config/blender_windows.cmake b/build_files/buildbot/config/blender_windows.cmake index cb8d173151e..19c3e0decf3 100644 --- a/build_files/buildbot/config/blender_windows.cmake +++ b/build_files/buildbot/config/blender_windows.cmake @@ -5,3 +5,5 @@ include("${CMAKE_CURRENT_LIST_DIR}/../../cmake/config/blender_release.cmake") set(WITH_CYCLES_TEST_OSL ON CACHE BOOL "" FORCE) + +set(HIPRT_COMPILER_PARALLEL_JOBS 4 CACHE STRING "" FORCE) \ No newline at end of file diff --git a/build_files/cmake/Modules/FindHIP.cmake b/build_files/cmake/Modules/FindHIP.cmake index cf2e21bd37f..008666cabcc 100644 --- a/build_files/cmake/Modules/FindHIP.cmake +++ b/build_files/cmake/Modules/FindHIP.cmake @@ -37,18 +37,22 @@ find_program(HIP_HIPCC_EXECUTABLE ) if(WIN32) - # Needed for HIP-RT on Windows. - find_program(HIP_LINKER_EXECUTABLE - NAMES - clang++ - HINTS - ${_hip_SEARCH_DIRS} + set(LINKER clang++) +else() + set(LINKER amdclang++) +endif() + +find_program(HIP_LINKER_EXECUTABLE + NAMES + ${LINKER} + HINTS + ${_hip_SEARCH_DIRS} PATH_SUFFIXES bin NO_DEFAULT_PATH NO_CMAKE_PATH - ) -endif() +) + if(HIP_HIPCC_EXECUTABLE) set(HIP_VERSION_MAJOR 0) @@ -95,6 +99,7 @@ if(HIP_HIPCC_EXECUTABLE) # Construct full semantic version. set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}") + set(HIP_VERSION_SHORT "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}") unset(_hip_version_raw) unset(_hipcc_executable) endif() diff --git a/build_files/cmake/Modules/FindHIPRT.cmake b/build_files/cmake/Modules/FindHIPRT.cmake index d57784e5531..7843ad833b8 100644 --- a/build_files/cmake/Modules/FindHIPRT.cmake +++ b/build_files/cmake/Modules/FindHIPRT.cmake @@ -4,7 +4,6 @@ # Find HIPRT SDK. This module defines: # HIPRT_INCLUDE_DIR, path to HIPRT include directory -# HIPRT_BITCODE, bitcode file with ray-tracing functionality # HIPRT_FOUND, if SDK found if(NOT (DEFINED HIPRT_ROOT_DIR)) @@ -23,36 +22,32 @@ endif() set(_hiprt_SEARCH_DIRS ${HIPRT_ROOT_DIR} + /opt/lib/hiprt ) find_path(HIPRT_INCLUDE_DIR NAMES hiprt/hiprt.h HINTS - ${_hiprt_SEARCH_DIRS}/include ${_hiprt_SEARCH_DIRS} + PATH_SUFFIXES + include ) +set(HIPRT_VERSION) + if(HIPRT_INCLUDE_DIR) file(STRINGS "${HIPRT_INCLUDE_DIR}/hiprt/hiprt.h" _hiprt_version REGEX "^#define HIPRT_VERSION_STR[ \t]\".*\"$") - string(REGEX MATCHALL "[0-9]+[.0-9]+" _hiprt_version ${_hiprt_version}) - - find_file(HIPRT_BITCODE - NAMES - hiprt${_hiprt_version}_amd_lib_win.bc - HINTS - ${HIPRT_ROOT_DIR}/bin - ${HIPRT_ROOT_DIR}/dist/bin/Release - NO_DEFAULT_PATH - ) - - unset(_hiprt_version) + string(REGEX MATCHALL "[0-9]+[.0-9]+" HIPRT_VERSION ${_hiprt_version}) endif() +unset(_hiprt_version) + include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(HIPRT DEFAULT_MSG - HIPRT_INCLUDE_DIR HIPRT_BITCODE) +find_package_handle_standard_args(HIPRT + REQUIRED_VARS HIPRT_INCLUDE_DIR HIP_LINKER_EXECUTABLE + FAIL_MESSAGE "HIP-RT or one of its dependencies not found") mark_as_advanced( HIPRT_INCLUDE_DIR diff --git a/build_files/cmake/config/blender_release.cmake b/build_files/cmake/config/blender_release.cmake index 4af6a307497..1cc483ffc51 100644 --- a/build_files/cmake/config/blender_release.cmake +++ b/build_files/cmake/config/blender_release.cmake @@ -90,6 +90,7 @@ if(NOT APPLE) # Can't use CMAKE_SYSTEM_PROCESSOR here as it's not set yet, # so fall back to checking the env for vcvarsall's VSCMD_ARG_TGT_ARCH if(NOT (WIN32 AND "$ENV{VSCMD_ARG_TGT_ARCH}" STREQUAL "arm64")) + set(WITH_CYCLES_DEVICE_HIPRT ON CACHE BOOL "" FORCE) set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE) set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE) set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE) @@ -97,7 +98,3 @@ if(NOT APPLE) set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE) endif() endif() - -if(WIN32 AND NOT (WIN32 AND "$ENV{VSCMD_ARG_TGT_ARCH}" STREQUAL "arm64")) - set(WITH_CYCLES_DEVICE_HIPRT ON CACHE BOOL "" FORCE) -endif() diff --git a/build_files/cmake/platform/platform_unix.cmake b/build_files/cmake/platform/platform_unix.cmake index b8c20e2106e..f6fd9f252db 100644 --- a/build_files/cmake/platform/platform_unix.cmake +++ b/build_files/cmake/platform/platform_unix.cmake @@ -629,6 +629,8 @@ if(DEFINED LIBDIR) without_system_libs_end() endif() +add_bundled_libraries(hiprt/lib) + # ---------------------------------------------------------------------------- # Build and Link Flags diff --git a/extern/hipew/include/hiprtew.h b/extern/hipew/include/hiprtew.h index 5f967e7c99c..d5af1dbcf7e 100644 --- a/extern/hipew/include/hiprtew.h +++ b/extern/hipew/include/hiprtew.h @@ -20,63 +20,141 @@ #include #define HIPRT_MAJOR_VERSION 2 -#define HIPRT_MINOR_VERSION 0 -#define HIPRT_PATCH_VERSION 0xb68861 +#define HIPRT_MINOR_VERSION 3 +#define HIPRT_PATCH_VERSION 0x7df94af -#define HIPRT_API_VERSION 2000 -#define HIPRT_VERSION_STR "02000" +#define HIPRT_API_VERSION 2003 +#define HIPRT_VERSION_STR "02003" +#define HIP_VERSION_STR "6.0" + +#ifdef _WIN32 +#define HIPRTAPI __stdcall +#else +#define HIPRTAPI +#define HIP_CB +#endif typedef unsigned int hiprtuint32_t; /* Function types. */ typedef hiprtError(thiprtCreateContext)(hiprtuint32_t hiprtApiVersion, - hiprtContextCreationInput &input, + const hiprtContextCreationInput &input, hiprtContext *outContext); typedef hiprtError(thiprtDestroyContext)(hiprtContext context); typedef hiprtError(thiprtCreateGeometry)(hiprtContext context, - const hiprtGeometryBuildInput *buildInput, - const hiprtBuildOptions *buildOptions, - hiprtGeometry *outGeometry); + const hiprtGeometryBuildInput &buildInput, + const hiprtBuildOptions buildOptions, + hiprtGeometry &outGeometry); typedef hiprtError(thiprtDestroyGeometry)(hiprtContext context, hiprtGeometry outGeometry); +typedef hiprtError(thiprtCreateGeometries)(hiprtContext context, + uint32_t numGeometries, + const hiprtGeometryBuildInput *buildInput, + const hiprtBuildOptions buildOptions, + hiprtGeometry **outGeometries); +typedef hiprtError(thiprtDestroyGeometries)(hiprtContext context, uint32_t numGeometries, + hiprtGeometry* outGeometry); + typedef hiprtError(thiprtBuildGeometry)(hiprtContext context, + hiprtBuildOperation buildOperation, + const hiprtGeometryBuildInput &buildInput, + const hiprtBuildOptions buildOptions, + hiprtDevicePtr temporaryBuffer, + hiprtApiStream stream, + hiprtGeometry outGeometry); + +typedef hiprtError(thiprtBuildGeometries)(hiprtContext context, + uint32_t numGeometries, hiprtBuildOperation buildOperation, const hiprtGeometryBuildInput *buildInput, const hiprtBuildOptions *buildOptions, hiprtDevicePtr temporaryBuffer, hiprtApiStream stream, - hiprtGeometry outGeometry); + hiprtGeometry *outGeometries); + + typedef hiprtError(thiprtGetGeometryBuildTemporaryBufferSize)( hiprtContext context, + const hiprtGeometryBuildInput &buildInput, + const hiprtBuildOptions buildOptions, + size_t &outSize); + +typedef hiprtError(thiprtGetGeometriesBuildTemporaryBufferSize)( + hiprtContext context, + uint32_t numGeometries, const hiprtGeometryBuildInput *buildInput, const hiprtBuildOptions *buildOptions, - size_t *outSize); + size_t &outSize); + +typedef hiprtError(thiprtCompactGeometry)( hiprtContext context, hiprtApiStream stream, hiprtGeometry geometryIn, hiprtGeometry& geometryOut); + +typedef hiprtError(thiprtCompactGeometries)( + hiprtContext context, + uint32_t numGeometries, + hiprtApiStream stream, + hiprtGeometry* geometriesIn, + hiprtGeometry** geometriesOut ); + typedef hiprtError(thiprtCreateScene)(hiprtContext context, + const hiprtSceneBuildInput &buildInput, + const hiprtBuildOptions buildOptions, + hiprtScene &outScene); + +typedef hiprtError(thiprtCreateScenes)(hiprtContext context, + uint32_t numScenes, const hiprtSceneBuildInput *buildInput, - const hiprtBuildOptions *buildOptions, - hiprtScene *outScene); + const hiprtBuildOptions buildOptions, + hiprtScene **outScene); + typedef hiprtError(thiprtDestroyScene)(hiprtContext context, hiprtScene outScene); +typedef hiprtError(thiprtDestroyScenes)( hiprtContext context, uint32_t numScenes,hiprtScene *scene ); typedef hiprtError(thiprtBuildScene)(hiprtContext context, + hiprtBuildOperation buildOperation, + const hiprtSceneBuildInput &buildInput, + const hiprtBuildOptions buildOptions, + hiprtDevicePtr temporaryBuffer, + hiprtApiStream stream, + hiprtScene outScene); +typedef hiprtError(thiprtBuildScenes)(hiprtContext context, + uint32_t numScenes, hiprtBuildOperation buildOperation, const hiprtSceneBuildInput *buildInput, const hiprtBuildOptions *buildOptions, hiprtDevicePtr temporaryBuffer, hiprtApiStream stream, - hiprtScene outScene); + hiprtScene *outScene); typedef hiprtError(thiprtGetSceneBuildTemporaryBufferSize)( hiprtContext context, + const hiprtSceneBuildInput &buildInput, + const hiprtBuildOptions buildOptions, + size_t &outSize); + +typedef hiprtError(thiprtGetScenesBuildTemporaryBufferSize)( + hiprtContext context, + uint32_t numScenes, const hiprtSceneBuildInput *buildInput, - const hiprtBuildOptions *buildOptions, - size_t *outSize); + const hiprtBuildOptions buildOptions, + size_t &outSize); + +typedef hiprtError(thiprtCompactScene)( hiprtContext context, hiprtApiStream stream, hiprtScene sceneIn, hiprtScene& sceneOut ); + +typedef hiprtError(thiprtCompactScenes)( + hiprtContext context, uint32_t numScenes, hiprtApiStream stream, hiprtScene* scenesIn, hiprtScene** scenesOut ); + typedef hiprtError(thiprtCreateFuncTable)(hiprtContext context, hiprtuint32_t numGeomTypes, hiprtuint32_t numRayTypes, - hiprtFuncTable *outFuncTable); + hiprtFuncTable &outFuncTable); typedef hiprtError(thiprtSetFuncTable)(hiprtContext context, hiprtFuncTable funcTable, hiprtuint32_t geomType, hiprtuint32_t rayType, hiprtFuncDataSet set); + + +typedef hiprtError (thiprtCreateGlobalStackBuffer)(hiprtContext context, const hiprtGlobalStackBufferInput& input, hiprtGlobalStackBuffer& stackBufferOut ); +typedef hiprtError (thiprtDestroyGlobalStackBuffer)( hiprtContext context, hiprtGlobalStackBuffer stackBuffer ); + typedef hiprtError(thiprtDestroyFuncTable)(hiprtContext context, hiprtFuncTable funcTable); typedef void(thiprtSetLogLevel)( hiprtLogLevel level ); @@ -94,6 +172,8 @@ extern thiprtBuildScene *hiprtBuildScene; extern thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize; extern thiprtCreateFuncTable *hiprtCreateFuncTable; extern thiprtSetFuncTable *hiprtSetFuncTable; +extern thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer; +extern thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer; extern thiprtDestroyFuncTable *hiprtDestroyFuncTable; extern thiprtSetLogLevel *hiprtSetLogLevel; diff --git a/extern/hipew/src/hipew.c b/extern/hipew/src/hipew.c index 736168e2e2f..5259f8b2f31 100644 --- a/extern/hipew/src/hipew.c +++ b/extern/hipew/src/hipew.c @@ -234,7 +234,6 @@ static int hipewHipInit(void) { #ifdef _WIN32 /* Expected in C:/Windows/System32 or similar, no path needed. */ const char *hip_paths[] = {"amdhip64.dll", "amdhip64_6.dll", NULL}; - #elif defined(__APPLE__) /* Default installation path. */ const char *hip_paths[] = {"", NULL}; diff --git a/extern/hipew/src/hiprtew.cc b/extern/hipew/src/hiprtew.cc index 5844d6466b3..b1db74faa7e 100644 --- a/extern/hipew/src/hiprtew.cc +++ b/extern/hipew/src/hiprtew.cc @@ -40,6 +40,8 @@ thiprtBuildScene *hiprtBuildScene; thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize; thiprtCreateFuncTable *hiprtCreateFuncTable; thiprtSetFuncTable *hiprtSetFuncTable; +thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer; +thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer; thiprtDestroyFuncTable *hiprtDestroyFuncTable; thiprtSetLogLevel *hiprtSetLogLevel; @@ -61,15 +63,17 @@ bool hiprtewInit() return result; } -#ifdef _WIN32 initialized = true; if (atexit(hipewHipRtExit)) { return false; } - std::string hiprt_ver(HIPRT_VERSION_STR); - std::string hiprt_path = "hiprt" + hiprt_ver + "64.dll"; +#ifdef _WIN32 + std::string hiprt_path = "hiprt64.dll"; +#else + std::string hiprt_path = "libhiprt64.so"; +#endif hiprt_lib = dynamic_library_open(hiprt_path.c_str()); @@ -89,11 +93,12 @@ bool hiprtewInit() HIPRT_LIBRARY_FIND(hiprtGetSceneBuildTemporaryBufferSize) HIPRT_LIBRARY_FIND(hiprtCreateFuncTable) HIPRT_LIBRARY_FIND(hiprtSetFuncTable) + HIPRT_LIBRARY_FIND(hiprtCreateGlobalStackBuffer) HIPRT_LIBRARY_FIND(hiprtDestroyFuncTable) + HIPRT_LIBRARY_FIND(hiprtDestroyGlobalStackBuffer) HIPRT_LIBRARY_FIND(hiprtSetLogLevel) result = true; -#endif return result; } diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index ea5ceb2b98a..6381c258faa 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1831,10 +1831,9 @@ class CyclesPreferences(bpy.types.AddonPreferences): if compute_device_type == 'HIP': import platform - if platform.system() == "Windows": # HIP-RT is currently only supported on Windows - row = layout.row() - row.active = has_rt_api_support['HIP'] - row.prop(self, "use_hiprt") + row = layout.row() + row.active = has_rt_api_support['HIP'] + row.prop(self, "use_hiprt") elif compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu: row = layout.row() diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index 935e5296572..ac04ba85972 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -43,7 +43,7 @@ endif() ########################################################################### if(WITH_CYCLES_DEVICE_HIP) - if(WITH_CYCLES_HIP_BINARIES) + if(WITH_CYCLES_HIP_BINARIES OR WITH_CYCLES_DEVICE_HIPRT) # Need at least HIP 5.5 to solve compiler bug affecting the kernel. find_package(HIP 5.5.0) set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES) @@ -55,6 +55,9 @@ if(WITH_CYCLES_DEVICE_HIP) # HIP RT if(WITH_CYCLES_DEVICE_HIP AND WITH_CYCLES_DEVICE_HIPRT) + if(DEFINED LIBDIR) + set(HIPRT_ROOT_DIR ${LIBDIR}/hiprt) + endif() find_package(HIPRT) set_and_warn_library_found("HIP RT" HIPRT_FOUND WITH_CYCLES_DEVICE_HIPRT) endif() diff --git a/intern/cycles/device/hiprt/device_impl.cpp b/intern/cycles/device/hiprt/device_impl.cpp index bc402453ff4..d26dd680336 100644 --- a/intern/cycles/device/hiprt/device_impl.cpp +++ b/intern/cycles/device/hiprt/device_impl.cpp @@ -59,7 +59,6 @@ BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */) HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless) : HIPDevice(info, stats, profiler, headless), - global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY), hiprt_context(NULL), scene(NULL), functions_table(NULL), @@ -77,6 +76,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile prim_time_offset(this, "prim_time_offset", MEM_GLOBAL) { HIPContextScope scope(this); + global_stack_buffer = {0}; hiprtContextCreationInput hiprt_context_input = {0}; hiprt_context_input.ctxt = hipContext; hiprt_context_input.device = hipDevice; @@ -90,7 +90,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile } rt_result = hiprtCreateFuncTable( - hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, &functions_table); + hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table); if (rt_result != hiprtSuccess) { set_error(string_printf("Failed to create HIPRT Function Table")); @@ -113,7 +113,8 @@ HIPRTDevice::~HIPRTDevice() custom_prim_info.free(); prim_time_offset.free(); prims_time.free(); - global_stack_buffer.free(); + + hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer); hiprtDestroyFuncTable(hiprt_context, functions_table); hiprtDestroyScene(hiprt_context, scene); hiprtDestroyContext(hiprt_context); @@ -156,12 +157,17 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const string kernel_md5 = util_md5_string(source_md5 + common_cflags); const string include_path = source_path; - const string bitcode_file = string_printf( + const string cycles_bc = string_printf( "cycles_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str()); - const string bitcode = path_cache_get(path_join("kernels", bitcode_file)); + const string cycles_bitcode = path_cache_get(path_join("kernels", cycles_bc)); const string fatbin_file = string_printf( "cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str()); const string fatbin = path_cache_get(path_join("kernels", fatbin_file)); + const string hiprt_bc = string_printf( + "hiprt_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str()); + const string hiprt_bitcode = path_cache_get(path_join("kernels", hiprt_bc)); + + const string hiprt_include_path = path_join(source_path, "kernel/device/hiprt"); VLOG(1) << "Testing for locally compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { @@ -210,6 +216,12 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, path_create_directories(fatbin); + string rtc_options; + rtc_options.append(" --offload-arch=").append(arch.c_str()); + rtc_options.append(" -D __HIPRT__"); + rtc_options.append(" -ffast-math -O3 -std=c++17"); + rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm"); + source_path = path_join(path_join(source_path, "kernel"), path_join("device", path_join(base, string_printf("%s.cpp", name)))); @@ -217,25 +229,44 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, double starttime = time_dt(); - const string hiprt_path = getenv("HIPRT_ROOT_DIR"); - // First, app kernels are compiled into bitcode, without access to implementation of HIP RT - // functions - if (!path_exists(bitcode)) { - - std::string rtc_options; - - rtc_options.append(" --offload-arch=").append(arch.c_str()); - rtc_options.append(" -D __HIPRT__"); - rtc_options.append(" -ffast-math -O3 -std=c++17"); - rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm"); + if (!path_exists(cycles_bitcode)) { string command = string_printf("%s %s -I %s -I %s %s -o \"%s\"", hipcc, rtc_options.c_str(), include_path.c_str(), - hiprt_path.c_str(), + hiprt_include_path.c_str(), source_path.c_str(), - bitcode.c_str()); + cycles_bitcode.c_str()); + + printf("Compiling %sHIP kernel ...\n%s\n", + (use_adaptive_compilation()) ? "adaptive " : "", + command.c_str()); + +# ifdef _WIN32 + command = "call " + command; +# endif + if (system(command.c_str()) != 0) { + set_error( + "Failed to execute compilation command, " + "see console for details."); + return string(); + } + } + + if (!path_exists(hiprt_bitcode)) { + + rtc_options.append(" -x hip"); + rtc_options.append(" -D HIPRT_BITCODE_LINKING "); + + string source_path = path_join(hiprt_include_path, "/hiprt/impl/hiprt_kernels_bitcode.h"); + + string command = string_printf("%s %s -I %s %s -o \"%s\"", + hipcc, + rtc_options.c_str(), + hiprt_include_path.c_str(), + source_path.c_str(), + hiprt_bitcode.c_str()); printf("Compiling %sHIP kernel ...\n%s\n", (use_adaptive_compilation()) ? "adaptive " : "", @@ -257,13 +288,11 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, string linker_options; linker_options.append(" --offload-arch=").append(arch.c_str()); linker_options.append(" -fgpu-rdc --hip-link --cuda-device-only "); - string hiprt_ver(HIPRT_VERSION_STR); - string hiprt_bc = hiprt_path + "\\dist\\bin\\Release\\hiprt" + hiprt_ver + "_amd_lib_win.bc"; - string linker_command = string_printf("clang++ %s \"%s\" %s -o \"%s\"", + string linker_command = string_printf("clang++ %s \"%s\" \"%s\" -o \"%s\"", linker_options.c_str(), - bitcode.c_str(), - hiprt_bc.c_str(), + cycles_bitcode.c_str(), + hiprt_bitcode.c_str(), fatbin.c_str()); # ifdef _WIN32 @@ -458,7 +487,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh * bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer; geom_input.type = hiprtPrimitiveTypeAABBList; - geom_input.aabbList.primitive = &bvh->custom_prim_aabb; + geom_input.primitive.aabbList = bvh->custom_prim_aabb; geom_input.geomType = Motion_Triangle; } else { @@ -490,7 +519,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh * bvh->vertex_data.host_pointer = 0; geom_input.type = hiprtPrimitiveTypeTriangleMesh; - geom_input.triangleMesh.primitive = &(bvh->triangle_mesh); + geom_input.primitive.triangleMesh = bvh->triangle_mesh; } return geom_input; @@ -629,7 +658,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh, Hair *hai bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer; geom_input.type = hiprtPrimitiveTypeAABBList; - geom_input.aabbList.primitive = &bvh->custom_prim_aabb; + geom_input.primitive.aabbList = bvh->custom_prim_aabb; geom_input.geomType = Curve; return geom_input; @@ -732,7 +761,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh, PointClou bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer; geom_input.type = hiprtPrimitiveTypeAABBList; - geom_input.aabbList.primitive = &bvh->custom_prim_aabb; + geom_input.primitive.aabbList = bvh->custom_prim_aabb; geom_input.geomType = Point; return geom_input; @@ -779,13 +808,13 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op size_t blas_scratch_buffer_size = 0; hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize( - hiprt_context, &geom_input, &options, &blas_scratch_buffer_size); + hiprt_context, geom_input, options, blas_scratch_buffer_size); if (rt_err != hiprtSuccess) { set_error(string_printf("Failed to get scratch buffer size for BLAS!")); } - rt_err = hiprtCreateGeometry(hiprt_context, &geom_input, &options, &bvh->hiprt_geom); + rt_err = hiprtCreateGeometry(hiprt_context, geom_input, options, bvh->hiprt_geom); if (rt_err != hiprtSuccess) { set_error(string_printf("Failed to create BLAS!")); @@ -800,8 +829,8 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op } rt_err = hiprtBuildGeometry(hiprt_context, hiprtBuildOperationBuild, - &bvh->geom_input, - &options, + bvh->geom_input, + options, (void *)(scratch_buffer.device_pointer), 0, bvh->hiprt_geom); @@ -951,7 +980,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, user_instance_id[num_instances] = blender_instance_id; prim_visibility[num_instances] = mask; - hiprt_blas_ptr[num_instances] = (uint64_t)hiprt_geom_current; + hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current; + hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry; num_instances++; } blas_ptr[blender_instance_id] = (uint64_t)hiprt_geom_current; @@ -981,13 +1011,13 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, } scene_input_ptr.instanceMasks = (void *)prim_visibility.device_pointer; - scene_input_ptr.instanceGeometries = (void *)hiprt_blas_ptr.device_pointer; + scene_input_ptr.instances = (void *)hiprt_blas_ptr.device_pointer; scene_input_ptr.instanceTransformHeaders = (void *)transform_headers.device_pointer; scene_input_ptr.instanceFrames = (void *)instance_transform_matrix.device_pointer; hiprtScene scene = 0; - hiprtError rt_err = hiprtCreateScene(hiprt_context, &scene_input_ptr, &options, &scene); + hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr, options, scene); if (rt_err != hiprtSuccess) { set_error(string_printf("Failed to create TLAS")); @@ -995,7 +1025,7 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, size_t tlas_scratch_buffer_size; rt_err = hiprtGetSceneBuildTemporaryBufferSize( - hiprt_context, &scene_input_ptr, &options, &tlas_scratch_buffer_size); + hiprt_context, scene_input_ptr, options, tlas_scratch_buffer_size); if (rt_err != hiprtSuccess) { set_error(string_printf("Failed to get scratch buffer size for TLAS")); @@ -1008,8 +1038,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, rt_err = hiprtBuildScene(hiprt_context, build_operation, - &scene_input_ptr, - &options, + scene_input_ptr, + options, (void *)scratch_buffer.device_pointer, 0, scene); diff --git a/intern/cycles/device/hiprt/device_impl.h b/intern/cycles/device/hiprt/device_impl.h index e34ac959ee3..0f87791ee0c 100644 --- a/intern/cycles/device/hiprt/device_impl.h +++ b/intern/cycles/device/hiprt/device_impl.h @@ -53,7 +53,7 @@ class HIPRTDevice : public HIPDevice { return hiprt_context; } - device_vector global_stack_buffer; + hiprtGlobalStackBuffer global_stack_buffer; protected: enum Filter_Function { Closest = 0, Shadows, Local, Volume, Max_Intersect_Filter_Function }; @@ -111,7 +111,7 @@ class HIPRTDevice : public HIPDevice { * blas_ptr has all the valid pointers and null pointers and blas for any geometry can be * directly retrieved from this array (used in subsurface scattering). */ device_vector user_instance_id; - device_vector hiprt_blas_ptr; + device_vector hiprt_blas_ptr; device_vector blas_ptr; /* custom_prim_info stores custom information for custom primitives for all the primitives in a diff --git a/intern/cycles/device/hiprt/queue.cpp b/intern/cycles/device/hiprt/queue.cpp index a37aafe234c..befb0a263f6 100644 --- a/intern/cycles/device/hiprt/queue.cpp +++ b/intern/cycles/device/hiprt/queue.cpp @@ -34,14 +34,25 @@ bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel, const HIPContextScope scope(hiprt_device_); const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel); - if (!hiprt_device_->global_stack_buffer.device_pointer) { - int max_path = num_concurrent_states(0); - hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int)); - hiprt_device_->global_stack_buffer.zero_to_device(); + if (!hiprt_device_->global_stack_buffer.stackData) { + uint32_t max_path = num_concurrent_states(0); + hiprtGlobalStackBufferInput stack_buffer_input{ + hiprtStackTypeGlobal, hiprtStackEntryTypeInteger, HIPRT_THREAD_STACK_SIZE, max_path}; + + hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(), + stack_buffer_input, + hiprt_device_->global_stack_buffer); + + if (rt_result != hiprtSuccess) { + LOG(ERROR) << "Failed to create hiprt Global Stack Buffer"; + return false; + } } DeviceKernelArguments args_copy = args; - args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer); + args_copy.add(DeviceKernelArguments::HIPRT_GLOBAL_STACK, + (void *)(&hiprt_device_->global_stack_buffer), + sizeof(hiprtGlobalStackBuffer)); /* Compute kernel launch parameters. */ const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE; diff --git a/intern/cycles/device/queue.h b/intern/cycles/device/queue.h index 1394ec8d85f..6751efe28c2 100644 --- a/intern/cycles/device/queue.h +++ b/intern/cycles/device/queue.h @@ -28,6 +28,7 @@ struct DeviceKernelArguments { INT32, FLOAT32, KERNEL_FILM_CONVERT, + HIPRT_GLOBAL_STACK, }; static const int MAX_ARGS = 18; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index a5089645a9a..c77c31ea4d8 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -92,6 +92,28 @@ set(SRC_KERNEL_DEVICE_HIPRT_HEADERS device/hiprt/hiprt_kernels.h ) +set(SRC_KERNEL_DEVICE_HIPRT_SDK + hiprt/impl/Aabb.h + hiprt/impl/BvhNode.h + hiprt/impl/Geometry.h + hiprt/impl/hiprt_device_impl.h + hiprt/impl/hiprt_kernels_bitcode.h + hiprt/impl/Instance.h + hiprt/impl/Math.h + hiprt/impl/QrDecomposition.h + hiprt/impl/Quaternion.h + hiprt/impl/Scene.h + hiprt/impl/Transform.h + hiprt/impl/Triangle.h +) + +set(SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS + hiprt/hiprt_common.h + hiprt/hiprt_device.h + hiprt/hiprt_types.h + hiprt/hiprt_vec.h +) + set(SRC_KERNEL_DEVICE_OPTIX_HEADERS device/optix/bvh.h device/optix/compat.h @@ -422,6 +444,21 @@ add_executable(zstd_compress ../cmake/zstd_compress.cpp) target_include_directories(zstd_compress SYSTEM PRIVATE ${ZSTD_INCLUDE_DIRS}) target_link_libraries(zstd_compress ${ZSTD_LIBRARIES} ${PTHREADS_LIBRARIES}) +if(NOT WITH_BLENDER) + # For the Cycles standalone put libraries next to the Cycles application. + set(cycles_kernel_runtime_lib_target_path ${CYCLES_INSTALL_PATH}) +else() + # For Blender put the libraries next to the Blender executable. + # + # Note that the installation path in the delayed_install is relative to the versioned folder, + # which means we need to go one level up. + set(cycles_kernel_runtime_lib_target_path "../") +endif() + +if(UNIX AND NOT APPLE) + set(cycles_kernel_runtime_lib_target_path ${cycles_kernel_runtime_lib_target_path}/lib) +endif() + # CUDA module if(WITH_CYCLES_CUDA_BINARIES) @@ -689,17 +726,13 @@ endif() # HIP RT module -if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES) - set(hiprt_sources device/hiprt/kernel.cpp - ${SRC_KERNEL_HEADERS} - ${SRC_KERNEL_DEVICE_GPU_HEADERS} - ${SRC_KERNEL_DEVICE_HIPRT_HEADERS} - ${SRC_UTIL_HEADERS}) - set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc) - set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb) - set(hiprt_file_compressed ${hiprt_file}.zst) - set(kernel_sources ${hiprt_sources}) - set(hiprt_kernel_src "/device/hiprt/kernel.cpp") +if(WITH_CYCLES_DEVICE_HIPRT) + set(HIPRT_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for for HIP-RT kernels") + mark_as_advanced(HIPRT_COMPILER_PARALLEL_JOBS) + + set(bvh_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd.hipfb) + set(bvh_file_oro ${CMAKE_CURRENT_BINARY_DIR}/oro_compiled_kernels.hipfb) + if(WIN32) set(hiprt_compile_command ${CMAKE_COMMAND}) set(hiprt_compile_flags @@ -713,7 +746,106 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES) foreach(arch ${CYCLES_HIP_BINARIES_ARCH}) list(APPEND target_gpus "--offload-arch=${arch}") endforeach() - set(hiprt_compile_flags + + if(WITH_NANOVDB) + set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB) + endif() + + if(WITH_CYCLES_DEBUG) + set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG) + endif() + + set(hiprt_compile_flags_bvh + ${hiprt_compile_flags} + ${target_gpus} + ${HIP_HIPCC_FLAGS} + -x hip + ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h + ${flags} + -D HIPRT_BITCODE_LINKING + -std=c++17 + -mllvm + -amdgpu-early-inline-all=false + -mllvm + -amdgpu-function-calls=true + -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} + --genco + -I ${HIPRT_INCLUDE_DIR} + -Wno-parentheses-equality + -Wno-unused-value + -ffast-math + -o ${bvh_file}) + + set(hiprt_compile_flags_bvh_oro + ${hiprt_compile_flags} + ${target_gpus} + ${HIP_HIPCC_FLAGS} + -x hip + ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h + ${flags} + -D HIPRT_BITCODE_LINKING + -std=c++17 + -mllvm + -amdgpu-early-inline-all=false + -mllvm + -amdgpu-function-calls=true + -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} + --genco + -I ${HIPRT_INCLUDE_DIR}/contrib/Orochi + -include hip/hip_runtime.h + -Wno-parentheses-equality + -Wno-unused-value + -ffast-math + -o ${bvh_file_oro}) + + add_custom_command( + OUTPUT ${bvh_file} + COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh} + DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h) + + add_custom_command( + OUTPUT ${bvh_file_oro} + COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh_oro} + DEPENDS ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h) + + delayed_install("" "${bvh_file}" ${cycles_kernel_runtime_lib_target_path}) + delayed_install("" "${bvh_file_oro}" ${cycles_kernel_runtime_lib_target_path}) + + if(WITH_CYCLES_HIP_BINARIES) + set(hiprt_sources device/hiprt/kernel.cpp + ${SRC_KERNEL_HEADERS} + ${SRC_KERNEL_DEVICE_GPU_HEADERS} + ${SRC_KERNEL_DEVICE_HIPRT_HEADERS} + ${SRC_UTIL_HEADERS}) + + set(cycles_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc) + set(sdk_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd_lib.bc) + set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb) + set(hiprt_file_compressed ${hiprt_file}.zst) + set(kernel_sources ${hiprt_sources}) + set(hiprt_kernel_src "/device/hiprt/kernel.cpp") + + set(hiprt_compile_flags_sdk_bc + ${hiprt_compile_flags} + ${target_gpus} + ${HIP_HIPCC_FLAGS} + ${flags} + -x hip + ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h + -D HIPRT_BITCODE_LINKING + -std=c++17 + -fgpu-rdc + -c + --gpu-bundle-output + -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} + -emit-llvm + -I ${HIPRT_INCLUDE_DIR} + -Wno-parentheses-equality + -Wno-unused-value + -ffast-math + -o ${sdk_bitcode_file}) + + set(hiprt_compile_flags_cycles_bc ${hiprt_compile_flags} ${target_gpus} ${HIP_HIPCC_FLAGS} @@ -727,6 +859,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES) -fgpu-rdc -c --gpu-bundle-output + -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} -emit-llvm -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt @@ -734,45 +867,52 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES) -Wno-parentheses-equality -Wno-unused-value -ffast-math - -o ${bitcode_file}) + -o ${cycles_bitcode_file}) - if(WITH_NANOVDB) - set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB) - endif() - - if(WITH_CYCLES_DEBUG) - set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG) - endif() add_custom_command( - OUTPUT ${bitcode_file} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags} + OUTPUT ${cycles_bitcode_file} + COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_cycles_bc} DEPENDS ${kernel_sources}) + + add_custom_command( + OUTPUT ${sdk_bitcode_file} + COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_sdk_bc} + DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h) + if(WIN32) set(hiprt_link_command ${CMAKE_COMMAND}) set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}" ${HIP_LINKER_EXECUTABLE}) else() - # not implemented yet + set(hiprt_link_command ${HIP_LINKER_EXECUTABLE}) + set(hiprt_link_flags) endif() + set(hiprt_link_flags ${hiprt_link_flags} ${target_gpus} -fgpu-rdc --hip-link --cuda-device-only - ${bitcode_file} - ${HIPRT_BITCODE} + -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} + ${cycles_bitcode_file} + ${sdk_bitcode_file} -o ${hiprt_file}) - add_custom_command( - OUTPUT ${hiprt_file} - COMMAND ${hiprt_link_command} ${hiprt_link_flags} - DEPENDS ${bitcode_file}) + + add_custom_command( + OUTPUT ${hiprt_file} + COMMAND ${hiprt_link_command} ${hiprt_link_flags} + DEPENDS ${cycles_bitcode_file} ${sdk_bitcode_file}) + add_custom_command( OUTPUT ${hiprt_file_compressed} COMMAND "$" ${hiprt_file} ${hiprt_file_compressed} DEPENDS ${hiprt_file}) - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib) - add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib) + + endif() + + add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed} ${bvh_file} ${bvh_file_oro}) cycles_set_solution_folder(cycles_kernel_hiprt) endif() @@ -1151,23 +1291,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI) DEPENDS ${cycles_oneapi_kernel_sources}) endif() - if(NOT WITH_BLENDER) - # For the Cycles standalone put libraries next to the Cycles application. - set(cycles_oneapi_target_path ${CYCLES_INSTALL_PATH}) - else() - # For Blender put the libraries next to the Blender executable. - # - # Note that the installation path in the delayed_install is relative to the versioned folder, - # which means we need to go one level up. - set(cycles_oneapi_target_path "../") - endif() - # install dynamic libraries required at runtime - if(WIN32) - delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}) - elseif(UNIX AND NOT APPLE) - delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}/lib) - endif() + delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_kernel_runtime_lib_target_path}) add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib}) endif() @@ -1287,6 +1412,10 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt) +if(WITH_CYCLES_DEVICE_HIPRT) + delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt) + delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt/impl) +endif() delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix) delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal) diff --git a/intern/cycles/kernel/device/hiprt/bvh.h b/intern/cycles/kernel/device/hiprt/bvh.h index b7c818d44a7..d577b2b350f 100644 --- a/intern/cycles/kernel/device/hiprt/bvh.h +++ b/intern/cycles/kernel/device/hiprt/bvh.h @@ -125,7 +125,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg, void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object)); // we don't need custom intersection functions for SSR # ifdef HIPRT_SHARED_STACK - hiprtGeomTraversalAnyHitCustomStack traversal(local_geom, + hiprtGeomTraversalAnyHitCustomStack traversal((hiprtGeometry)local_geom, ray_hip, stack, hiprtTraversalHintDefault, diff --git a/intern/cycles/kernel/device/hiprt/common.h b/intern/cycles/kernel/device/hiprt/common.h index 743d7b537f0..b1c1754124c 100644 --- a/intern/cycles/kernel/device/hiprt/common.h +++ b/intern/cycles/kernel/device/hiprt/common.h @@ -44,36 +44,38 @@ struct LocalPayload { # if defined(HIPRT_SHARED_STACK) # define GET_TRAVERSAL_STACK() \ - Stack stack(&kg->global_stack_buffer[0], \ - HIPRT_THREAD_STACK_SIZE, \ - kg->shared_stack, \ - HIPRT_SHARED_STACK_SIZE); + Stack stack(kg->global_stack_buffer, kg->shared_stack); \ + Instance_Stack instance_stack; # else # define GET_TRAVERSAL_STACK() # endif # ifdef HIPRT_SHARED_STACK # define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \ - hiprtSceneTraversalAnyHitCustomStack traversal(kernel_data.device_bvh, \ - ray_hip, \ - stack, \ - visibility, \ - hiprtTraversalHintDefault, \ - &payload, \ - kernel_params.FUNCTION_TABLE, \ - RAY_TYPE, \ - RAY_TIME); + hiprtSceneTraversalAnyHitCustomStack traversal( \ + (hiprtScene)kernel_data.device_bvh, \ + ray_hip, \ + stack, \ + instance_stack, \ + visibility, \ + hiprtTraversalHintDefault, \ + &payload, \ + kernel_params.FUNCTION_TABLE, \ + RAY_TYPE, \ + RAY_TIME); # define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \ - hiprtSceneTraversalClosestCustomStack traversal(kernel_data.device_bvh, \ - ray_hip, \ - stack, \ - visibility, \ - hiprtTraversalHintDefault, \ - &payload, \ - kernel_params.FUNCTION_TABLE, \ - RAY_TYPE, \ - RAY_TIME); + hiprtSceneTraversalClosestCustomStack traversal( \ + (hiprtScene)kernel_data.device_bvh, \ + ray_hip, \ + stack, \ + instance_stack, \ + visibility, \ + hiprtTraversalHintDefault, \ + &payload, \ + kernel_params.FUNCTION_TABLE, \ + RAY_TYPE, \ + RAY_TIME); # else # define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \ hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \ @@ -654,14 +656,14 @@ ccl_device_inline bool volume_intersection_filter(const hiprtRay &ray, return false; } -HIPRT_DEVICE bool intersectFunc(u32 geomType, - u32 rayType, +HIPRT_DEVICE bool intersectFunc(uint geomType, + uint rayType, const hiprtFuncTableHeader &tableHeader, const hiprtRay &ray, void *payload, hiprtHit &hit) { - const u32 index = tableHeader.numGeomTypes * rayType + geomType; + const uint index = tableHeader.numGeomTypes * rayType + geomType; const void *data = tableHeader.funcDataSets[index].filterFuncData; switch (index) { case Curve_Intersect_Function: @@ -683,14 +685,14 @@ HIPRT_DEVICE bool intersectFunc(u32 geomType, return false; } -HIPRT_DEVICE bool filterFunc(u32 geomType, - u32 rayType, +HIPRT_DEVICE bool filterFunc(uint geomType, + uint rayType, const hiprtFuncTableHeader &tableHeader, const hiprtRay &ray, void *payload, const hiprtHit &hit) { - const u32 index = tableHeader.numGeomTypes * rayType + geomType; + const uint index = tableHeader.numGeomTypes * rayType + geomType; const void *data = tableHeader.funcDataSets[index].intersectFuncData; switch (index) { case Triangle_Filter_Closest: diff --git a/intern/cycles/kernel/device/hiprt/globals.h b/intern/cycles/kernel/device/hiprt/globals.h index b2c7812d27a..fbb053c2e3a 100644 --- a/intern/cycles/kernel/device/hiprt/globals.h +++ b/intern/cycles/kernel/device/hiprt/globals.h @@ -31,9 +31,9 @@ CCL_NAMESPACE_BEGIN struct KernelGlobalsGPU { - int *global_stack_buffer; + hiprtGlobalStackBuffer global_stack_buffer; #ifdef HIPRT_SHARED_STACK - int *shared_stack; + hiprtSharedStackBuffer shared_stack; #endif }; @@ -47,7 +47,8 @@ typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals; ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \ ccl_global KernelGlobalsGPU kg_gpu; \ KernelGlobals kg = &kg_gpu; \ - kg->shared_stack = &shared_stack[0]; \ + kg->shared_stack.stackData = &shared_stack[0]; \ + kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \ kg->global_stack_buffer = stack_buffer; #else # define HIPRT_INIT_KERNEL_GLOBAL() \ @@ -146,6 +147,7 @@ __constant__ KernelParamsHIPRT kernel_params; # ifdef HIPRT_SHARED_STACK typedef hiprtGlobalStack Stack; +typedef hiprtEmptyInstanceStack Instance_Stack; # endif #endif diff --git a/intern/cycles/kernel/device/hiprt/hiprt_kernels.h b/intern/cycles/kernel/device/hiprt/hiprt_kernels.h index 815ead7f96d..9bbfb290592 100644 --- a/intern/cycles/kernel/device/hiprt/hiprt_kernels.h +++ b/intern/cycles/kernel/device/hiprt/hiprt_kernels.h @@ -9,7 +9,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS) ccl_global const int *path_index_array, ccl_global float *render_buffer, const int work_size, - ccl_global int *stack_buffer) + ccl_global hiprtGlobalStackBuffer stack_buffer) { const int global_index = ccl_gpu_global_id_x(); @@ -25,7 +25,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS) ccl_gpu_kernel_signature(integrator_intersect_shadow, ccl_global const int *path_index_array, const int work_size, - ccl_global int *stack_buffer) + ccl_global hiprtGlobalStackBuffer stack_buffer) { const int global_index = ccl_gpu_global_id_x(); @@ -41,7 +41,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS) ccl_gpu_kernel_signature(integrator_intersect_subsurface, ccl_global const int *path_index_array, const int work_size, - ccl_global int *stack_buffer) + ccl_global hiprtGlobalStackBuffer stack_buffer) { const int global_index = ccl_gpu_global_id_x(); @@ -57,7 +57,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS) ccl_gpu_kernel_signature(integrator_intersect_volume_stack, ccl_global const int *path_index_array, const int work_size, - ccl_global int *stack_buffer) + ccl_global hiprtGlobalStackBuffer stack_buffer) { const int global_index = ccl_gpu_global_id_x(); @@ -72,7 +72,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS) ccl_gpu_kernel_signature(integrator_intersect_dedicated_light, ccl_global const int *path_index_array, const int work_size, - ccl_global int *stack_buffer) + ccl_global hiprtGlobalStackBuffer stack_buffer) { const int global_index = ccl_gpu_global_id_x(); @@ -89,7 +89,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS) ccl_global const int *path_index_array, ccl_global float *render_buffer, const int work_size, - ccl_global int *stack_buffer) + ccl_global hiprtGlobalStackBuffer stack_buffer) { const int global_index = ccl_gpu_global_id_x(); if (global_index < work_size) { @@ -104,7 +104,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS) ccl_global const int *path_index_array, ccl_global float *render_buffer, const int work_size, - ccl_global int *stack_buffer) + ccl_global hiprtGlobalStackBuffer stack_buffer) { const int global_index = ccl_gpu_global_id_x(); if (global_index < work_size) { diff --git a/lib/linux_x64 b/lib/linux_x64 index 15d135d1014..2b125e847c5 160000 --- a/lib/linux_x64 +++ b/lib/linux_x64 @@ -1 +1 @@ -Subproject commit 15d135d1014c9cc0519b1d0eaadaa9763295312c +Subproject commit 2b125e847c545780740d6259370e385a16913301 diff --git a/lib/windows_x64 b/lib/windows_x64 index bb7ae0e1073..efa049df4c3 160000 --- a/lib/windows_x64 +++ b/lib/windows_x64 @@ -1 +1 @@ -Subproject commit bb7ae0e107391705e20dc424ffc53edeea4d51de +Subproject commit efa049df4c3155090f810bc5a68c00786f922cf6 diff --git a/source/creator/CMakeLists.txt b/source/creator/CMakeLists.txt index 3516cfd8c73..a20af5fec3a 100644 --- a/source/creator/CMakeLists.txt +++ b/source/creator/CMakeLists.txt @@ -1899,6 +1899,17 @@ if(WIN32) endforeach() endif() +if(WIN32) + if(WITH_CYCLES_DEVICE_HIPRT) + if(EXISTS ${LIBDIR}/hiprt/bin/hiprt64.dll) + install( + FILES ${LIBDIR}/hiprt/bin/hiprt64.dll + DESTINATION "./" + ) + endif() + endif() +endif() + # `vcpkg` substitutes our libraries with theirs, which will cause issues when you you run # these builds on other systems due to missing DLL's. So we opt out the use of `vcpkg`. if(WIN32)