From 6363181af9dc418c97a633ac404a1285cd0b8b03 Mon Sep 17 00:00:00 2001 From: "Sahar A. Kashi" Date: Thu, 20 Feb 2025 17:34:14 +0100 Subject: [PATCH] Cycles: HIP-RT 2.5 integration and gfx12 support This change brings the following improvements on the user level - Support of GPUs with gfx12 architecture - New HIP-RT library which in addition to the gfx12 support brings various bug-fixes. The known limitation of gfx12 is that OpenImageDenoiser does not yet support this GPU architecture. This means that while Cycles will use the full advantage of the gfx12 (including hardware accelerated ray-tracing), denoising will only be possible on CPU, or secondary gfx11 or below GPU. This is something that requires a change in OIDN and it is to late to do it for Blender 4.4, but it is something to look forward for Blender 4.5. The gfx12 changes for the pre-compiled kernels is rather trivial, so it comes together (in the same PR) as the bigger HIP-RT change. On the development side this change brings the following improvements: - One step compile and link (much simpler CMake rules) - Embedding BVH binaries in hiprt dll (which makes it easier to package and load, without relying on special path configuration) Co-authored-by: Sahar Kashi Co-authored-by: Sergey Sharybin Co-authored-by: Brecht Van Lommel Pull Request: https://projects.blender.org/blender/blender/pulls/133129 --- CMakeLists.txt | 3 +- build_files/build_environment/CMakeLists.txt | 5 +- .../cmake/check_compilers.cmake | 9 +- .../build_environment/cmake/hiprt.cmake | 42 ++-- .../build_environment/cmake/versions.cmake | 12 +- .../linux/linux_rocky8_setup.sh | 24 ++- .../build_environment/patches/hiprt.diff | 35 ++++ build_files/cmake/Modules/FindHIP.cmake | 71 ++----- build_files/cmake/Modules/FindHIPRT.cmake | 2 +- build_files/config/pipeline_config.yaml | 4 +- extern/hipew/include/hiprtew.h | 10 +- intern/cycles/cmake/external_libs.cmake | 2 +- intern/cycles/device/hiprt/device_impl.cpp | 151 +++++--------- intern/cycles/kernel/CMakeLists.txt | 185 ++++-------------- intern/cycles/kernel/device/hiprt/bvh.h | 3 + intern/cycles/kernel/device/hiprt/kernel.cpp | 2 +- lib/linux_x64 | 2 +- lib/windows_x64 | 2 +- 18 files changed, 207 insertions(+), 357 deletions(-) create mode 100644 build_files/build_environment/patches/hiprt.diff diff --git a/CMakeLists.txt b/CMakeLists.txt index 0c3328507ef..f9fec79a755 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -715,7 +715,8 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")) set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1036 - gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 + gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 + gfx1200 gfx1201 CACHE STRING "AMD HIP architectures to build binaries for" ) mark_as_advanced(WITH_CYCLES_DEVICE_HIP) diff --git a/build_files/build_environment/CMakeLists.txt b/build_files/build_environment/CMakeLists.txt index 4603e9d49a6..295b15eabea 100644 --- a/build_files/build_environment/CMakeLists.txt +++ b/build_files/build_environment/CMakeLists.txt @@ -90,10 +90,9 @@ include(cmake/harfbuzz.cmake) if(NOT APPLE) include(cmake/xr_openxr.cmake) if(NOT BLENDER_PLATFORM_ARM) - if(HIP_FOUND) + # HIP-RT requires special SDK which is not widely available yet. + if(NOT WIN32) include(cmake/hiprt.cmake) - else() - message(STATUS "Missing HIP compiler, skipping HIPRT build") endif() include(cmake/dpcpp.cmake) include(cmake/dpcpp_deps.cmake) diff --git a/build_files/build_environment/cmake/check_compilers.cmake b/build_files/build_environment/cmake/check_compilers.cmake index 1ba96825fdc..67fdf4c7ff3 100644 --- a/build_files/build_environment/cmake/check_compilers.cmake +++ b/build_files/build_environment/cmake/check_compilers.cmake @@ -6,14 +6,14 @@ message(STATUS "Found C Compiler: ${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") if(UNIX AND NOT APPLE) - if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_C_COMPILER_VERSION MATCHES ${RELEASE_GCC_VERSION})) + if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_C_COMPILER_VERSION MATCHES "${RELEASE_GCC_VERSION}.*")) message(STATUS " NOTE: Official releases uses GCC ${RELEASE_GCC_VERSION}") endif() endif() message(STATUS "Found C++ Compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") if(UNIX AND NOT APPLE) - if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_CXX_COMPILER_VERSION MATCHES ${RELEASE_GCC_VERSION})) + if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_CXX_COMPILER_VERSION MATCHES "${RELEASE_GCC_VERSION}.*")) message(STATUS " NOTE: Official releases uses GCC ${RELEASE_GCC_VERSION}") endif() endif() @@ -24,19 +24,18 @@ if(NOT APPLE) message(STATUS "Missing CUDA compiler") else() message(STATUS "Found CUDA Compiler: ${CUDAToolkit_NVCC_EXECUTABLE} ${CUDAToolkit_VERSION}") - if(NOT CUDAToolkit_VERSION MATCHES ${RELEASE_CUDA_VERSION}) + if(NOT CUDAToolkit_VERSION MATCHES "${RELEASE_CUDA_VERSION}.*") message(STATUS " NOTE: Official releases uses CUDA ${RELEASE_CUDA_VERSION}") endif() endif() - unset(HIP_VERSION) find_package(HIP QUIET) if(NOT HIP_FOUND) message(STATUS "Missing HIP compiler") else() message(STATUS "Found HIP Compiler: ${HIP_HIPCC_EXECUTABLE} ${HIP_VERSION}") - if(NOT HIP_VERSION MATCHES ${RELEASE_HIP_VERSION}) + if(NOT HIP_VERSION MATCHES "${RELEASE_HIP_VERSION}.*") message(STATUS " NOTE: Official releases uses HIP ${RELEASE_HIP_VERSION}") endif() endif() diff --git a/build_files/build_environment/cmake/hiprt.cmake b/build_files/build_environment/cmake/hiprt.cmake index 618e83bfe19..d64310072ea 100644 --- a/build_files/build_environment/cmake/hiprt.cmake +++ b/build_files/build_environment/cmake/hiprt.cmake @@ -2,32 +2,34 @@ # # SPDX-License-Identifier: GPL-2.0-or-later +if(NOT HIP_FOUND) + message(STATUS "Missing HIP compiler, skipping HIPRT build") + return() +endif() + +if(NOT HIP_VERSION MATCHES "${RELEASE_HIP_VERSION}.*") + message(STATUS "Wrong HIP compiler version (expected ${RELEASE_HIP_VERSION}), skipping HIPRT build") + return() +endif() + # Note the utility apps may use png/tiff/gif system libraries, but the # library itself does not depend on them, so should give no problems. -get_filename_component(_hip_path ${HIP_HIPCC_EXECUTABLE} DIRECTORY) -get_filename_component(_hip_path ${_hip_path} DIRECTORY) +get_filename_component(_hip_bin_path ${HIP_HIPCC_EXECUTABLE} DIRECTORY) +get_filename_component(_hip_path ${_hip_bin_path} DIRECTORY) set(HIPRT_EXTRA_ARGS -DCMAKE_BUILD_TYPE=Release -DHIP_PATH=${_hip_path} - -DBITCODE=ON + -DBITCODE=OFF -DGENERATE_BAKE_KERNEL=OFF -DNO_UNITTEST=ON + -DBAKE_COMPILED_KERNEL=ON + -DPRECOMPILE=ON + -DPYTHON_EXECUTABLE=${PYTHON_BINARY} + -DFORCE_DISABLE_CUDA=ON ) -if(WIN32) - # Windows is currently defaulting to HIP 5 for the buildbot and the - # dependency build environment. - list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=ON) -else() - # The Linux uses HIP 6 by default in those environments, but it had - # -DHIPRT_PREFER_HIP_5=ON passed to the dependency builder in its - # initial implementation. Force it to off so that incremental build - # in the existing build environment does the right thing. - list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=OFF) -endif() - set(HIPRT_SOURCE_DIR ${BUILD_DIR}/hiprt/src/external_hiprt) set(HIPRT_BUILD_DIR ${BUILD_DIR}/hiprt/src/external_hiprt-build) @@ -38,6 +40,10 @@ ExternalProject_Add(external_hiprt CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR} PREFIX ${BUILD_DIR}/hiprt + PATCH_COMMAND ${PATCH_CMD} -p 1 -d + ${BUILD_DIR}/hiprt/src/external_hiprt < + ${PATCH_DIR}/hiprt.diff + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/hiprt ${HIPRT_EXTRA_ARGS} @@ -45,6 +51,11 @@ ExternalProject_Add(external_hiprt INSTALL_DIR ${LIBDIR}/hiprt ) +add_dependencies( + external_hiprt + external_python +) + if(WIN32) # Strip version from shared library name. ExternalProject_Add_Step(external_hiprt after_install @@ -68,4 +79,5 @@ else() ) harvest(external_hiprt hiprt/include hiprt/include "*.h") harvest(external_hiprt hiprt/bin hiprt/lib "*${SHAREDLIBEXT}*") + harvest(external_hiprt hiprt/bin hiprt/lib "*.hipfb") endif() diff --git a/build_files/build_environment/cmake/versions.cmake b/build_files/build_environment/cmake/versions.cmake index f5e2b765496..563ccbc1d9a 100644 --- a/build_files/build_environment/cmake/versions.cmake +++ b/build_files/build_environment/cmake/versions.cmake @@ -8,9 +8,9 @@ # For anyone making their own library build, matching these exactly is not # needed but it can be a useful reference. -set(RELEASE_GCC_VERSION 11.2.*) -set(RELEASE_CUDA_VERSION 12.8.*) -set(RELEASE_HIP_VERSION 5.7.*) +set(RELEASE_GCC_VERSION 11.2) +set(RELEASE_CUDA_VERSION 12.8) +set(RELEASE_HIP_VERSION 6.3) # Libraries # @@ -1371,10 +1371,10 @@ set(PYBIND11_HOMEPAGE https://github.com/pybind/pybind11) set(PYBIND11_LICENSE SPDX:BSD-2-Clause) set(PYBIND11_COPYRIGHT "Copyright (c) 2016 Wenzel Jakob , All rights reserved.") -set(HIPRT_VERSION 83e18cc9c3de8f2f9c48b663cf3189361e891054) -set(HIPRT_LIBRARY_VERSION 02003) +set(HIPRT_VERSION c4298933fe046e3f915977b5a69537a59aac8f47) +set(HIPRT_LIBRARY_VERSION 02005) set(HIPRT_URI https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT/archive/${HIPRT_VERSION}.tar.gz) -set(HIPRT_HASH b5639fa06bea45eff98bea2929516f7c) +set(HIPRT_HASH 65b8a975d23db04d908c758f3fb7bae3) set(HIPRT_HASH_TYPE MD5) set(HIPRT_FILE hiprt-${HIPRT_VERSION}.tar.gz) set(HIPRT_HOMEPAGE https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT) diff --git a/build_files/build_environment/linux/linux_rocky8_setup.sh b/build_files/build_environment/linux/linux_rocky8_setup.sh index 831bfd4b453..8031d0dfff2 100644 --- a/build_files/build_environment/linux/linux_rocky8_setup.sh +++ b/build_files/build_environment/linux/linux_rocky8_setup.sh @@ -200,15 +200,27 @@ yum -y install jack-audio-connection-kit-devel # - "Install kernel driver". # Register ROCm packages -rm -f /etc/yum.repos.d/rocm.repo -tee --append /etc/yum.repos.d/rocm.repo < #define HIPRT_MAJOR_VERSION 2 -#define HIPRT_MINOR_VERSION 3 -#define HIPRT_PATCH_VERSION 0x7df94af +#define HIPRT_MINOR_VERSION 5 +#define HIPRT_PATCH_VERSION 0x0201c59 -#define HIPRT_API_VERSION 2003 -#define HIPRT_VERSION_STR "02003" -#define HIP_VERSION_STR "6.0" +#define HIPRT_API_VERSION 2005 +#define HIPRT_VERSION_STR "02005" +#define HIP_VERSION_STR "6.3" #ifdef _WIN32 #define HIPRTAPI __stdcall diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index ac04ba85972..aa9dbd151fd 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -43,7 +43,7 @@ endif() ########################################################################### if(WITH_CYCLES_DEVICE_HIP) - if(WITH_CYCLES_HIP_BINARIES OR WITH_CYCLES_DEVICE_HIPRT) + if(WITH_CYCLES_HIP_BINARIES) # Need at least HIP 5.5 to solve compiler bug affecting the kernel. find_package(HIP 5.5.0) set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES) diff --git a/intern/cycles/device/hiprt/device_impl.cpp b/intern/cycles/device/hiprt/device_impl.cpp index 455cc792a38..a44e9cdfdeb 100644 --- a/intern/cycles/device/hiprt/device_impl.cpp +++ b/intern/cycles/device/hiprt/device_impl.cpp @@ -145,7 +145,7 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const std::string arch = hipDeviceArch(hipDevId); if (!use_adaptive_compilation()) { - const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb.zst", name)); + const string fatbin = path_get(string_printf("lib/%s_rt_%s.hipfb.zst", name, arch.c_str())); VLOG(1) << "Testing for pre-compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { VLOG(1) << "Using precompiled kernel."; @@ -160,16 +160,9 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const string kernel_md5 = util_md5_string(source_md5 + common_cflags); const string include_path = source_path; - const string cycles_bc = string_printf( - "cycles_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str()); - const string cycles_bitcode = path_cache_get(path_join("kernels", cycles_bc)); const string fatbin_file = string_printf( "cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str()); const string fatbin = path_cache_get(path_join("kernels", fatbin_file)); - const string hiprt_bc = string_printf( - "hiprt_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str()); - const string hiprt_bitcode = path_cache_get(path_join("kernels", hiprt_bc)); - const string hiprt_include_path = path_join(source_path, "kernel/device/hiprt"); VLOG(1) << "Testing for locally compiled kernel " << fatbin << "."; @@ -219,89 +212,35 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, path_create_directories(fatbin); - string rtc_options; - rtc_options.append(" --offload-arch=").append(arch); - rtc_options.append(" -D __HIPRT__"); - rtc_options.append(" -ffast-math -O3 -std=c++17"); - rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm"); - source_path = path_join(path_join(source_path, "kernel"), path_join("device", path_join(base, string_printf("%s.cpp", name)))); + const char *const kernel_ext = "genco"; + string options; + options.append( + "-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17 -D __HIPRT__"); + options.append(" --offload-arch=").append(arch.c_str()); +# ifdef WITH_NANOVDB + options.append(" -D WITH_NANOVDB"); +# endif + printf("Compiling %s and caching to %s", source_path.c_str(), fatbin.c_str()); double starttime = time_dt(); - if (!path_exists(cycles_bitcode)) { - - string command = string_printf("%s %s -I %s -I %s %s -o \"%s\"", - hipcc, - rtc_options.c_str(), - include_path.c_str(), - hiprt_include_path.c_str(), - source_path.c_str(), - cycles_bitcode.c_str()); - - printf("Compiling %sHIP kernel ...\n%s\n", - (use_adaptive_compilation()) ? "adaptive " : "", - command.c_str()); + string compile_command = string_printf("%s %s -I %s -I %s --%s %s -o \"%s\"", + hipcc, + options.c_str(), + include_path.c_str(), + hiprt_include_path.c_str(), + kernel_ext, + source_path.c_str(), + fatbin.c_str()); # ifdef _WIN32 - command = "call " + command; + compile_command = "call " + compile_command; # endif - if (system(command.c_str()) != 0) { - set_error( - "Failed to execute compilation command, " - "see console for details."); - return string(); - } - } - - if (!path_exists(hiprt_bitcode)) { - - rtc_options.append(" -x hip"); - rtc_options.append(" -D HIPRT_BITCODE_LINKING "); - - string source_path = path_join(hiprt_include_path, "/hiprt/impl/hiprt_kernels_bitcode.h"); - - string command = string_printf("%s %s -I %s %s -o \"%s\"", - hipcc, - rtc_options.c_str(), - hiprt_include_path.c_str(), - source_path.c_str(), - hiprt_bitcode.c_str()); - - printf("Compiling %sHIP kernel ...\n%s\n", - (use_adaptive_compilation()) ? "adaptive " : "", - command.c_str()); - -# ifdef _WIN32 - command = "call " + command; -# endif - if (system(command.c_str()) != 0) { - set_error( - "Failed to execute compilation command, " - "see console for details."); - return string(); - } - } - - // After compilation, the bitcode produced is linked with HIP RT bitcode (containing - // implementations of HIP RT functions, e.g. traversal, to produce the final executable code - string linker_options; - linker_options.append(" --offload-arch=").append(arch); - linker_options.append(" -fgpu-rdc --hip-link --cuda-device-only "); - - string linker_command = string_printf("clang++ %s \"%s\" \"%s\" -o \"%s\"", - linker_options.c_str(), - cycles_bitcode.c_str(), - hiprt_bitcode.c_str(), - fatbin.c_str()); - -# ifdef _WIN32 - linker_command = "call " + linker_command; -# endif - if (system(linker_command.c_str()) != 0) { + if (system(compile_command.c_str()) != 0) { set_error( "Failed to execute linking command, " "see console for details."); @@ -831,14 +770,19 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op if (rt_err != hiprtSuccess) { set_error(string_printf("Failed to create BLAS!")); } - bvh->geom_input = geom_input; { thread_scoped_lock lock(hiprt_mutex); if (blas_scratch_buffer_size > scratch_buffer_size) { scratch_buffer.alloc(blas_scratch_buffer_size); - scratch_buffer_size = blas_scratch_buffer_size; scratch_buffer.zero_to_device(); + if (!scratch_buffer.device_pointer) { + hiprtDestroyGeometry(hiprt_context, bvh->hiprt_geom); + bvh->hiprt_geom = nullptr; + return; + } + scratch_buffer_size = blas_scratch_buffer_size; } + bvh->geom_input = geom_input; rt_err = hiprtBuildGeometry(hiprt_context, hiprtBuildOperationBuild, bvh->geom_input, @@ -1003,6 +947,28 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, blender_instance_id++; } + size_t table_ptr_size = 0; + hipDeviceptr_t table_device_ptr; + + hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule, "kernel_params")); + + size_t kernel_param_offset[4]; + int table_index = 0; + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_closest_intersect); + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_shadow_intersect); + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_local_intersect); + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_volume_intersect); + + for (int index = 0; index < table_index; index++) { + + hip_assert(hipMemcpyHtoD(table_device_ptr + kernel_param_offset[index], + (void *)&functions_table, + sizeof(device_ptr))); + } + + if (num_instances == 0) + return nullptr; + int frame_count = transform_matrix.size(); hiprtSceneBuildInput scene_input_ptr = {nullptr}; scene_input_ptr.instanceCount = num_instances; @@ -1109,25 +1075,6 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, prim_time_offset.copy_to_device(); } - size_t table_ptr_size = 0; - hipDeviceptr_t table_device_ptr; - - hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule, "kernel_params")); - - size_t kernel_param_offset[4]; - int table_index = 0; - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_closest_intersect); - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_shadow_intersect); - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_local_intersect); - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_volume_intersect); - - for (int index = 0; index < table_index; index++) { - - hip_assert(hipMemcpyHtoD(table_device_ptr + kernel_param_offset[index], - (void *)&functions_table, - sizeof(device_ptr))); - } - return scene; } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 25dd081e620..81c7e499b33 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -97,7 +97,6 @@ set(SRC_KERNEL_DEVICE_HIPRT_SDK hiprt/impl/hiprt_device_impl.h hiprt/impl/hiprt_kernels_bitcode.h hiprt/impl/Instance.h - hiprt/impl/Math.h hiprt/impl/QrDecomposition.h hiprt/impl/Quaternion.h hiprt/impl/Scene.h @@ -110,6 +109,7 @@ set(SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS hiprt/hiprt_device.h hiprt/hiprt_types.h hiprt/hiprt_vec.h + hiprt/hiprt_math.h ) set(SRC_KERNEL_DEVICE_OPTIX_HEADERS @@ -666,7 +666,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) set(hip_command ${CMAKE_COMMAND}) set(hip_flags -E env "HIP_PATH=${HIP_ROOT_DIR}" - ${HIP_HIPCC_EXECUTABLE}.bat) + ${HIP_HIPCC_EXECUTABLE}) else() set(hip_command ${HIP_HIPCC_EXECUTABLE}) set(hip_flags) @@ -734,26 +734,19 @@ endif() # HIP RT module -if(WITH_CYCLES_DEVICE_HIPRT) +if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIPRT) set(HIPRT_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for HIP-RT kernels") mark_as_advanced(HIPRT_COMPILER_PARALLEL_JOBS) - set(bvh_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd.hipfb) - set(bvh_file_oro ${CMAKE_CURRENT_BINARY_DIR}/oro_compiled_kernels.hipfb) - if(WIN32) set(hiprt_compile_command ${CMAKE_COMMAND}) set(hiprt_compile_flags -E env "HIP_PATH=${HIP_ROOT_DIR}" - ${HIP_HIPCC_EXECUTABLE}.bat) + ${HIP_HIPCC_EXECUTABLE}) else() set(hiprt_compile_command ${HIP_HIPCC_EXECUTABLE}) set(hiprt_compile_flags) endif() - set(target_gpus) - foreach(arch ${CYCLES_HIP_BINARIES_ARCH}) - list(APPEND target_gpus "--offload-arch=${arch}") - endforeach() if(WITH_NANOVDB) set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB) @@ -767,14 +760,23 @@ if(WITH_CYCLES_DEVICE_HIPRT) message(WARNING "HIP SDK ${HIP_VERSION} has known rendering artifacts with HIPRT. 5.7 is recommended instead") endif() - set(hiprt_compile_flags_bvh + set(hiprt_sources device/hiprt/kernel.cpp + ${SRC_KERNEL_HEADERS} + ${SRC_KERNEL_DEVICE_GPU_HEADERS} + ${SRC_KERNEL_DEVICE_HIPRT_HEADERS} + ${SRC_UTIL_HEADERS} + ) + + set(kernel_sources ${hiprt_sources}) + set(hiprt_kernel_src "/device/hiprt/kernel.cpp") + set(hiprt_compile_flags ${hiprt_compile_flags} - ${target_gpus} ${HIP_HIPCC_FLAGS} - -x hip - ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h - ${flags} - -D HIPRT_BITCODE_LINKING + ${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src} + -D CCL_NAMESPACE_BEGIN= + -D CCL_NAMESPACE_END= + -D HIPCC + -D __HIPRT__ -std=c++17 -mllvm -amdgpu-early-inline-all=false @@ -782,157 +784,38 @@ if(WITH_CYCLES_DEVICE_HIPRT) -amdgpu-function-calls=true -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} --genco + -I ${CMAKE_CURRENT_SOURCE_DIR}/.. + -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt -I ${HIPRT_INCLUDE_DIR} -Wno-parentheses-equality -Wno-unused-value -ffast-math - -o ${bvh_file}) + ) - set(hiprt_compile_flags_bvh_oro - ${hiprt_compile_flags} - ${target_gpus} - ${HIP_HIPCC_FLAGS} - -x hip - ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h - ${flags} - -D HIPRT_BITCODE_LINKING - -std=c++17 - -mllvm - -amdgpu-early-inline-all=false - -mllvm - -amdgpu-function-calls=true - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - --genco - -I ${HIPRT_INCLUDE_DIR}/contrib/Orochi - -include hip/hip_runtime.h - -Wno-parentheses-equality - -Wno-unused-value - -ffast-math - -o ${bvh_file_oro}) - - add_custom_command( - OUTPUT ${bvh_file} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh} - DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h) - - add_custom_command( - OUTPUT ${bvh_file_oro} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh_oro} - DEPENDS ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h) - - delayed_install("" "${bvh_file}" ${cycles_kernel_runtime_lib_target_path}) - delayed_install("" "${bvh_file_oro}" ${cycles_kernel_runtime_lib_target_path}) - - if(WITH_CYCLES_HIP_BINARIES) - set(hiprt_sources device/hiprt/kernel.cpp - ${SRC_KERNEL_HEADERS} - ${SRC_KERNEL_DEVICE_GPU_HEADERS} - ${SRC_KERNEL_DEVICE_HIPRT_HEADERS} - ${SRC_UTIL_HEADERS} - ) - - set(cycles_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc) - set(sdk_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd_lib.bc) - set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb) + set(hiprt_hipfb) + foreach(arch ${CYCLES_HIP_BINARIES_ARCH}) + set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_${arch}.hipfb) set(hiprt_file_compressed ${hiprt_file}.zst) - set(kernel_sources ${hiprt_sources}) - set(hiprt_kernel_src "/device/hiprt/kernel.cpp") - - set(hiprt_compile_flags_sdk_bc - ${hiprt_compile_flags} - ${target_gpus} - ${HIP_HIPCC_FLAGS} - ${flags} - -x hip - ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h - -D HIPRT_BITCODE_LINKING - -std=c++17 - -fgpu-rdc - -c - --gpu-bundle-output - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - -emit-llvm - -I ${HIPRT_INCLUDE_DIR} - -Wno-parentheses-equality - -Wno-unused-value - -ffast-math - -o ${sdk_bitcode_file}) - - set(hiprt_compile_flags_cycles_bc - ${hiprt_compile_flags} - ${target_gpus} - ${HIP_HIPCC_FLAGS} - ${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src} - ${flags} - -D CCL_NAMESPACE_BEGIN= - -D CCL_NAMESPACE_END= - -D HIPCC - -D __HIPRT__ - -std=c++17 - -fgpu-rdc - -c - --gpu-bundle-output - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - -emit-llvm - -I ${CMAKE_CURRENT_SOURCE_DIR}/.. - -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt - -I ${HIPRT_INCLUDE_DIR} - -Wno-parentheses-equality - -Wno-unused-value - -ffast-math - -o ${cycles_bitcode_file} - ) - - add_custom_command( - OUTPUT ${cycles_bitcode_file} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_cycles_bc} - DEPENDS ${kernel_sources} - ) - - add_custom_command( - OUTPUT ${sdk_bitcode_file} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_sdk_bc} - DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h - ) - - if(WIN32) - set(hiprt_link_command ${CMAKE_COMMAND}) - set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}" - ${HIP_LINKER_EXECUTABLE} - ) - else() - set(hiprt_link_command ${HIP_LINKER_EXECUTABLE}) - set(hiprt_link_flags) - endif() - - set(hiprt_link_flags - ${hiprt_link_flags} - ${target_gpus} - -fgpu-rdc - --hip-link - --cuda-device-only - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - ${cycles_bitcode_file} - ${sdk_bitcode_file} - -o ${hiprt_file} - ) + set(hiprt_flags + ${hiprt_compile_flags} + --offload-arch=${arch} + -o ${hiprt_file}) add_custom_command( OUTPUT ${hiprt_file} - COMMAND ${hiprt_link_command} ${hiprt_link_flags} - DEPENDS ${cycles_bitcode_file} ${sdk_bitcode_file} + COMMAND ${hiprt_compile_command} ${hiprt_flags} + DEPENDS ${kernel_sources} ) - add_custom_command( OUTPUT ${hiprt_file_compressed} COMMAND "$" ${hiprt_file} ${hiprt_file_compressed} DEPENDS ${hiprt_file} ) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND hiprt_hipfb ${hiprt_file_compressed}) + endforeach() - endif() - - add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed} ${bvh_file} ${bvh_file_oro}) + add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_hipfb}) cycles_set_solution_folder(cycles_kernel_hiprt) endif() diff --git a/intern/cycles/kernel/device/hiprt/bvh.h b/intern/cycles/kernel/device/hiprt/bvh.h index 9f5bc69caa7..54aa36dbf41 100644 --- a/intern/cycles/kernel/device/hiprt/bvh.h +++ b/intern/cycles/kernel/device/hiprt/bvh.h @@ -30,6 +30,9 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, return false; } + if (kernel_data.device_bvh == 0) + return false; + hiprtRay ray_hip; SET_HIPRT_RAY(ray_hip, ray) diff --git a/intern/cycles/kernel/device/hiprt/kernel.cpp b/intern/cycles/kernel/device/hiprt/kernel.cpp index 94670c6667a..5dcb72ef3c0 100644 --- a/intern/cycles/kernel/device/hiprt/kernel.cpp +++ b/intern/cycles/kernel/device/hiprt/kernel.cpp @@ -7,7 +7,7 @@ # include "kernel/device/hip/compat.h" # include "kernel/device/hip/config.h" -# include +# include # include "kernel/device/hiprt/globals.h" diff --git a/lib/linux_x64 b/lib/linux_x64 index d6873633784..85a8dca8860 160000 --- a/lib/linux_x64 +++ b/lib/linux_x64 @@ -1 +1 @@ -Subproject commit d6873633784d5fab59e98062e4f5b3179d62a397 +Subproject commit 85a8dca8860c7a7ee318530bcf0111d825993dd2 diff --git a/lib/windows_x64 b/lib/windows_x64 index d70d6d25313..9c935bb513b 160000 --- a/lib/windows_x64 +++ b/lib/windows_x64 @@ -1 +1 @@ -Subproject commit d70d6d253134478ed0673f080acba39a492ac619 +Subproject commit 9c935bb513b0a30da639ee09d64ea26ef98c2763