diff --git a/CMakeLists.txt b/CMakeLists.txt index 37b3161474f..d13232dc003 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -718,7 +718,8 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")) set(CYCLES_HIP_BINARIES_ARCH gfx1010 gfx1011 gfx1012 gfx1030 gfx1031 gfx1032 gfx1034 gfx1035 gfx1036 - gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 + gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 + gfx1200 gfx1201 CACHE STRING "AMD HIP architectures to build binaries for" ) mark_as_advanced(WITH_CYCLES_DEVICE_HIP) diff --git a/build_files/build_environment/CMakeLists.txt b/build_files/build_environment/CMakeLists.txt index 4603e9d49a6..295b15eabea 100644 --- a/build_files/build_environment/CMakeLists.txt +++ b/build_files/build_environment/CMakeLists.txt @@ -90,10 +90,9 @@ include(cmake/harfbuzz.cmake) if(NOT APPLE) include(cmake/xr_openxr.cmake) if(NOT BLENDER_PLATFORM_ARM) - if(HIP_FOUND) + # HIP-RT requires special SDK which is not widely available yet. + if(NOT WIN32) include(cmake/hiprt.cmake) - else() - message(STATUS "Missing HIP compiler, skipping HIPRT build") endif() include(cmake/dpcpp.cmake) include(cmake/dpcpp_deps.cmake) diff --git a/build_files/build_environment/cmake/check_compilers.cmake b/build_files/build_environment/cmake/check_compilers.cmake index 1ba96825fdc..67fdf4c7ff3 100644 --- a/build_files/build_environment/cmake/check_compilers.cmake +++ b/build_files/build_environment/cmake/check_compilers.cmake @@ -6,14 +6,14 @@ message(STATUS "Found C Compiler: ${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") if(UNIX AND NOT APPLE) - if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_C_COMPILER_VERSION MATCHES ${RELEASE_GCC_VERSION})) + if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_C_COMPILER_VERSION MATCHES "${RELEASE_GCC_VERSION}.*")) message(STATUS " NOTE: Official releases uses GCC ${RELEASE_GCC_VERSION}") endif() endif() message(STATUS "Found C++ Compiler: ${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") if(UNIX AND NOT APPLE) - if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_CXX_COMPILER_VERSION MATCHES ${RELEASE_GCC_VERSION})) + if(NOT CMAKE_COMPILER_IS_GNUCC OR NOT (CMAKE_CXX_COMPILER_VERSION MATCHES "${RELEASE_GCC_VERSION}.*")) message(STATUS " NOTE: Official releases uses GCC ${RELEASE_GCC_VERSION}") endif() endif() @@ -24,19 +24,18 @@ if(NOT APPLE) message(STATUS "Missing CUDA compiler") else() message(STATUS "Found CUDA Compiler: ${CUDAToolkit_NVCC_EXECUTABLE} ${CUDAToolkit_VERSION}") - if(NOT CUDAToolkit_VERSION MATCHES ${RELEASE_CUDA_VERSION}) + if(NOT CUDAToolkit_VERSION MATCHES "${RELEASE_CUDA_VERSION}.*") message(STATUS " NOTE: Official releases uses CUDA ${RELEASE_CUDA_VERSION}") endif() endif() - unset(HIP_VERSION) find_package(HIP QUIET) if(NOT HIP_FOUND) message(STATUS "Missing HIP compiler") else() message(STATUS "Found HIP Compiler: ${HIP_HIPCC_EXECUTABLE} ${HIP_VERSION}") - if(NOT HIP_VERSION MATCHES ${RELEASE_HIP_VERSION}) + if(NOT HIP_VERSION MATCHES "${RELEASE_HIP_VERSION}.*") message(STATUS " NOTE: Official releases uses HIP ${RELEASE_HIP_VERSION}") endif() endif() diff --git a/build_files/build_environment/cmake/hiprt.cmake b/build_files/build_environment/cmake/hiprt.cmake index 618e83bfe19..d64310072ea 100644 --- a/build_files/build_environment/cmake/hiprt.cmake +++ b/build_files/build_environment/cmake/hiprt.cmake @@ -2,32 +2,34 @@ # # SPDX-License-Identifier: GPL-2.0-or-later +if(NOT HIP_FOUND) + message(STATUS "Missing HIP compiler, skipping HIPRT build") + return() +endif() + +if(NOT HIP_VERSION MATCHES "${RELEASE_HIP_VERSION}.*") + message(STATUS "Wrong HIP compiler version (expected ${RELEASE_HIP_VERSION}), skipping HIPRT build") + return() +endif() + # Note the utility apps may use png/tiff/gif system libraries, but the # library itself does not depend on them, so should give no problems. -get_filename_component(_hip_path ${HIP_HIPCC_EXECUTABLE} DIRECTORY) -get_filename_component(_hip_path ${_hip_path} DIRECTORY) +get_filename_component(_hip_bin_path ${HIP_HIPCC_EXECUTABLE} DIRECTORY) +get_filename_component(_hip_path ${_hip_bin_path} DIRECTORY) set(HIPRT_EXTRA_ARGS -DCMAKE_BUILD_TYPE=Release -DHIP_PATH=${_hip_path} - -DBITCODE=ON + -DBITCODE=OFF -DGENERATE_BAKE_KERNEL=OFF -DNO_UNITTEST=ON + -DBAKE_COMPILED_KERNEL=ON + -DPRECOMPILE=ON + -DPYTHON_EXECUTABLE=${PYTHON_BINARY} + -DFORCE_DISABLE_CUDA=ON ) -if(WIN32) - # Windows is currently defaulting to HIP 5 for the buildbot and the - # dependency build environment. - list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=ON) -else() - # The Linux uses HIP 6 by default in those environments, but it had - # -DHIPRT_PREFER_HIP_5=ON passed to the dependency builder in its - # initial implementation. Force it to off so that incremental build - # in the existing build environment does the right thing. - list(APPEND HIPRT_EXTRA_ARGS -DHIPRT_PREFER_HIP_5=OFF) -endif() - set(HIPRT_SOURCE_DIR ${BUILD_DIR}/hiprt/src/external_hiprt) set(HIPRT_BUILD_DIR ${BUILD_DIR}/hiprt/src/external_hiprt-build) @@ -38,6 +40,10 @@ ExternalProject_Add(external_hiprt CMAKE_GENERATOR ${PLATFORM_ALT_GENERATOR} PREFIX ${BUILD_DIR}/hiprt + PATCH_COMMAND ${PATCH_CMD} -p 1 -d + ${BUILD_DIR}/hiprt/src/external_hiprt < + ${PATCH_DIR}/hiprt.diff + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${LIBDIR}/hiprt ${HIPRT_EXTRA_ARGS} @@ -45,6 +51,11 @@ ExternalProject_Add(external_hiprt INSTALL_DIR ${LIBDIR}/hiprt ) +add_dependencies( + external_hiprt + external_python +) + if(WIN32) # Strip version from shared library name. ExternalProject_Add_Step(external_hiprt after_install @@ -68,4 +79,5 @@ else() ) harvest(external_hiprt hiprt/include hiprt/include "*.h") harvest(external_hiprt hiprt/bin hiprt/lib "*${SHAREDLIBEXT}*") + harvest(external_hiprt hiprt/bin hiprt/lib "*.hipfb") endif() diff --git a/build_files/build_environment/cmake/versions.cmake b/build_files/build_environment/cmake/versions.cmake index f5e2b765496..563ccbc1d9a 100644 --- a/build_files/build_environment/cmake/versions.cmake +++ b/build_files/build_environment/cmake/versions.cmake @@ -8,9 +8,9 @@ # For anyone making their own library build, matching these exactly is not # needed but it can be a useful reference. -set(RELEASE_GCC_VERSION 11.2.*) -set(RELEASE_CUDA_VERSION 12.8.*) -set(RELEASE_HIP_VERSION 5.7.*) +set(RELEASE_GCC_VERSION 11.2) +set(RELEASE_CUDA_VERSION 12.8) +set(RELEASE_HIP_VERSION 6.3) # Libraries # @@ -1371,10 +1371,10 @@ set(PYBIND11_HOMEPAGE https://github.com/pybind/pybind11) set(PYBIND11_LICENSE SPDX:BSD-2-Clause) set(PYBIND11_COPYRIGHT "Copyright (c) 2016 Wenzel Jakob , All rights reserved.") -set(HIPRT_VERSION 83e18cc9c3de8f2f9c48b663cf3189361e891054) -set(HIPRT_LIBRARY_VERSION 02003) +set(HIPRT_VERSION c4298933fe046e3f915977b5a69537a59aac8f47) +set(HIPRT_LIBRARY_VERSION 02005) set(HIPRT_URI https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT/archive/${HIPRT_VERSION}.tar.gz) -set(HIPRT_HASH b5639fa06bea45eff98bea2929516f7c) +set(HIPRT_HASH 65b8a975d23db04d908c758f3fb7bae3) set(HIPRT_HASH_TYPE MD5) set(HIPRT_FILE hiprt-${HIPRT_VERSION}.tar.gz) set(HIPRT_HOMEPAGE https://github.com/GPUOpen-LibrariesAndSDKs/HIPRT) diff --git a/build_files/build_environment/linux/linux_rocky8_setup.sh b/build_files/build_environment/linux/linux_rocky8_setup.sh index 831bfd4b453..8031d0dfff2 100644 --- a/build_files/build_environment/linux/linux_rocky8_setup.sh +++ b/build_files/build_environment/linux/linux_rocky8_setup.sh @@ -200,15 +200,27 @@ yum -y install jack-audio-connection-kit-devel # - "Install kernel driver". # Register ROCm packages -rm -f /etc/yum.repos.d/rocm.repo -tee --append /etc/yum.repos.d/rocm.repo < #define HIPRT_MAJOR_VERSION 2 -#define HIPRT_MINOR_VERSION 3 -#define HIPRT_PATCH_VERSION 0x7df94af +#define HIPRT_MINOR_VERSION 5 +#define HIPRT_PATCH_VERSION 0x0201c59 -#define HIPRT_API_VERSION 2003 -#define HIPRT_VERSION_STR "02003" -#define HIP_VERSION_STR "6.0" +#define HIPRT_API_VERSION 2005 +#define HIPRT_VERSION_STR "02005" +#define HIP_VERSION_STR "6.3" #ifdef _WIN32 #define HIPRTAPI __stdcall diff --git a/intern/cycles/cmake/external_libs.cmake b/intern/cycles/cmake/external_libs.cmake index ac04ba85972..aa9dbd151fd 100644 --- a/intern/cycles/cmake/external_libs.cmake +++ b/intern/cycles/cmake/external_libs.cmake @@ -43,7 +43,7 @@ endif() ########################################################################### if(WITH_CYCLES_DEVICE_HIP) - if(WITH_CYCLES_HIP_BINARIES OR WITH_CYCLES_DEVICE_HIPRT) + if(WITH_CYCLES_HIP_BINARIES) # Need at least HIP 5.5 to solve compiler bug affecting the kernel. find_package(HIP 5.5.0) set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES) diff --git a/intern/cycles/device/hiprt/device_impl.cpp b/intern/cycles/device/hiprt/device_impl.cpp index 455cc792a38..a44e9cdfdeb 100644 --- a/intern/cycles/device/hiprt/device_impl.cpp +++ b/intern/cycles/device/hiprt/device_impl.cpp @@ -145,7 +145,7 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const std::string arch = hipDeviceArch(hipDevId); if (!use_adaptive_compilation()) { - const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb.zst", name)); + const string fatbin = path_get(string_printf("lib/%s_rt_%s.hipfb.zst", name, arch.c_str())); VLOG(1) << "Testing for pre-compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { VLOG(1) << "Using precompiled kernel."; @@ -160,16 +160,9 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const string kernel_md5 = util_md5_string(source_md5 + common_cflags); const string include_path = source_path; - const string cycles_bc = string_printf( - "cycles_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str()); - const string cycles_bitcode = path_cache_get(path_join("kernels", cycles_bc)); const string fatbin_file = string_printf( "cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str()); const string fatbin = path_cache_get(path_join("kernels", fatbin_file)); - const string hiprt_bc = string_printf( - "hiprt_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str()); - const string hiprt_bitcode = path_cache_get(path_join("kernels", hiprt_bc)); - const string hiprt_include_path = path_join(source_path, "kernel/device/hiprt"); VLOG(1) << "Testing for locally compiled kernel " << fatbin << "."; @@ -219,89 +212,35 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, path_create_directories(fatbin); - string rtc_options; - rtc_options.append(" --offload-arch=").append(arch); - rtc_options.append(" -D __HIPRT__"); - rtc_options.append(" -ffast-math -O3 -std=c++17"); - rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm"); - source_path = path_join(path_join(source_path, "kernel"), path_join("device", path_join(base, string_printf("%s.cpp", name)))); + const char *const kernel_ext = "genco"; + string options; + options.append( + "-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17 -D __HIPRT__"); + options.append(" --offload-arch=").append(arch.c_str()); +# ifdef WITH_NANOVDB + options.append(" -D WITH_NANOVDB"); +# endif + printf("Compiling %s and caching to %s", source_path.c_str(), fatbin.c_str()); double starttime = time_dt(); - if (!path_exists(cycles_bitcode)) { - - string command = string_printf("%s %s -I %s -I %s %s -o \"%s\"", - hipcc, - rtc_options.c_str(), - include_path.c_str(), - hiprt_include_path.c_str(), - source_path.c_str(), - cycles_bitcode.c_str()); - - printf("Compiling %sHIP kernel ...\n%s\n", - (use_adaptive_compilation()) ? "adaptive " : "", - command.c_str()); + string compile_command = string_printf("%s %s -I %s -I %s --%s %s -o \"%s\"", + hipcc, + options.c_str(), + include_path.c_str(), + hiprt_include_path.c_str(), + kernel_ext, + source_path.c_str(), + fatbin.c_str()); # ifdef _WIN32 - command = "call " + command; + compile_command = "call " + compile_command; # endif - if (system(command.c_str()) != 0) { - set_error( - "Failed to execute compilation command, " - "see console for details."); - return string(); - } - } - - if (!path_exists(hiprt_bitcode)) { - - rtc_options.append(" -x hip"); - rtc_options.append(" -D HIPRT_BITCODE_LINKING "); - - string source_path = path_join(hiprt_include_path, "/hiprt/impl/hiprt_kernels_bitcode.h"); - - string command = string_printf("%s %s -I %s %s -o \"%s\"", - hipcc, - rtc_options.c_str(), - hiprt_include_path.c_str(), - source_path.c_str(), - hiprt_bitcode.c_str()); - - printf("Compiling %sHIP kernel ...\n%s\n", - (use_adaptive_compilation()) ? "adaptive " : "", - command.c_str()); - -# ifdef _WIN32 - command = "call " + command; -# endif - if (system(command.c_str()) != 0) { - set_error( - "Failed to execute compilation command, " - "see console for details."); - return string(); - } - } - - // After compilation, the bitcode produced is linked with HIP RT bitcode (containing - // implementations of HIP RT functions, e.g. traversal, to produce the final executable code - string linker_options; - linker_options.append(" --offload-arch=").append(arch); - linker_options.append(" -fgpu-rdc --hip-link --cuda-device-only "); - - string linker_command = string_printf("clang++ %s \"%s\" \"%s\" -o \"%s\"", - linker_options.c_str(), - cycles_bitcode.c_str(), - hiprt_bitcode.c_str(), - fatbin.c_str()); - -# ifdef _WIN32 - linker_command = "call " + linker_command; -# endif - if (system(linker_command.c_str()) != 0) { + if (system(compile_command.c_str()) != 0) { set_error( "Failed to execute linking command, " "see console for details."); @@ -831,14 +770,19 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op if (rt_err != hiprtSuccess) { set_error(string_printf("Failed to create BLAS!")); } - bvh->geom_input = geom_input; { thread_scoped_lock lock(hiprt_mutex); if (blas_scratch_buffer_size > scratch_buffer_size) { scratch_buffer.alloc(blas_scratch_buffer_size); - scratch_buffer_size = blas_scratch_buffer_size; scratch_buffer.zero_to_device(); + if (!scratch_buffer.device_pointer) { + hiprtDestroyGeometry(hiprt_context, bvh->hiprt_geom); + bvh->hiprt_geom = nullptr; + return; + } + scratch_buffer_size = blas_scratch_buffer_size; } + bvh->geom_input = geom_input; rt_err = hiprtBuildGeometry(hiprt_context, hiprtBuildOperationBuild, bvh->geom_input, @@ -1003,6 +947,28 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, blender_instance_id++; } + size_t table_ptr_size = 0; + hipDeviceptr_t table_device_ptr; + + hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule, "kernel_params")); + + size_t kernel_param_offset[4]; + int table_index = 0; + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_closest_intersect); + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_shadow_intersect); + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_local_intersect); + kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_volume_intersect); + + for (int index = 0; index < table_index; index++) { + + hip_assert(hipMemcpyHtoD(table_device_ptr + kernel_param_offset[index], + (void *)&functions_table, + sizeof(device_ptr))); + } + + if (num_instances == 0) + return nullptr; + int frame_count = transform_matrix.size(); hiprtSceneBuildInput scene_input_ptr = {nullptr}; scene_input_ptr.instanceCount = num_instances; @@ -1109,25 +1075,6 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh, prim_time_offset.copy_to_device(); } - size_t table_ptr_size = 0; - hipDeviceptr_t table_device_ptr; - - hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule, "kernel_params")); - - size_t kernel_param_offset[4]; - int table_index = 0; - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_closest_intersect); - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_shadow_intersect); - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_local_intersect); - kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_volume_intersect); - - for (int index = 0; index < table_index; index++) { - - hip_assert(hipMemcpyHtoD(table_device_ptr + kernel_param_offset[index], - (void *)&functions_table, - sizeof(device_ptr))); - } - return scene; } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index f6395d0cf6b..59c98a91785 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -97,7 +97,6 @@ set(SRC_KERNEL_DEVICE_HIPRT_SDK hiprt/impl/hiprt_device_impl.h hiprt/impl/hiprt_kernels_bitcode.h hiprt/impl/Instance.h - hiprt/impl/Math.h hiprt/impl/QrDecomposition.h hiprt/impl/Quaternion.h hiprt/impl/Scene.h @@ -110,6 +109,7 @@ set(SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS hiprt/hiprt_device.h hiprt/hiprt_types.h hiprt/hiprt_vec.h + hiprt/hiprt_math.h ) set(SRC_KERNEL_DEVICE_OPTIX_HEADERS @@ -666,7 +666,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) set(hip_command ${CMAKE_COMMAND}) set(hip_flags -E env "HIP_PATH=${HIP_ROOT_DIR}" - ${HIP_HIPCC_EXECUTABLE}.bat) + ${HIP_HIPCC_EXECUTABLE}) else() set(hip_command ${HIP_HIPCC_EXECUTABLE}) set(hip_flags) @@ -734,26 +734,19 @@ endif() # HIP RT module -if(WITH_CYCLES_DEVICE_HIPRT) +if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIPRT) set(HIPRT_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for HIP-RT kernels") mark_as_advanced(HIPRT_COMPILER_PARALLEL_JOBS) - set(bvh_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd.hipfb) - set(bvh_file_oro ${CMAKE_CURRENT_BINARY_DIR}/oro_compiled_kernels.hipfb) - if(WIN32) set(hiprt_compile_command ${CMAKE_COMMAND}) set(hiprt_compile_flags -E env "HIP_PATH=${HIP_ROOT_DIR}" - ${HIP_HIPCC_EXECUTABLE}.bat) + ${HIP_HIPCC_EXECUTABLE}) else() set(hiprt_compile_command ${HIP_HIPCC_EXECUTABLE}) set(hiprt_compile_flags) endif() - set(target_gpus) - foreach(arch ${CYCLES_HIP_BINARIES_ARCH}) - list(APPEND target_gpus "--offload-arch=${arch}") - endforeach() if(WITH_NANOVDB) set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB) @@ -767,14 +760,23 @@ if(WITH_CYCLES_DEVICE_HIPRT) message(WARNING "HIP SDK ${HIP_VERSION} has known rendering artifacts with HIPRT. 5.7 is recommended instead") endif() - set(hiprt_compile_flags_bvh + set(hiprt_sources device/hiprt/kernel.cpp + ${SRC_KERNEL_HEADERS} + ${SRC_KERNEL_DEVICE_GPU_HEADERS} + ${SRC_KERNEL_DEVICE_HIPRT_HEADERS} + ${SRC_UTIL_HEADERS} + ) + + set(kernel_sources ${hiprt_sources}) + set(hiprt_kernel_src "/device/hiprt/kernel.cpp") + set(hiprt_compile_flags ${hiprt_compile_flags} - ${target_gpus} ${HIP_HIPCC_FLAGS} - -x hip - ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h - ${flags} - -D HIPRT_BITCODE_LINKING + ${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src} + -D CCL_NAMESPACE_BEGIN= + -D CCL_NAMESPACE_END= + -D HIPCC + -D __HIPRT__ -std=c++17 -mllvm -amdgpu-early-inline-all=false @@ -782,157 +784,38 @@ if(WITH_CYCLES_DEVICE_HIPRT) -amdgpu-function-calls=true -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} --genco + -I ${CMAKE_CURRENT_SOURCE_DIR}/.. + -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt -I ${HIPRT_INCLUDE_DIR} -Wno-parentheses-equality -Wno-unused-value -ffast-math - -o ${bvh_file}) + ) - set(hiprt_compile_flags_bvh_oro - ${hiprt_compile_flags} - ${target_gpus} - ${HIP_HIPCC_FLAGS} - -x hip - ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h - ${flags} - -D HIPRT_BITCODE_LINKING - -std=c++17 - -mllvm - -amdgpu-early-inline-all=false - -mllvm - -amdgpu-function-calls=true - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - --genco - -I ${HIPRT_INCLUDE_DIR}/contrib/Orochi - -include hip/hip_runtime.h - -Wno-parentheses-equality - -Wno-unused-value - -ffast-math - -o ${bvh_file_oro}) - - add_custom_command( - OUTPUT ${bvh_file} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh} - DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h) - - add_custom_command( - OUTPUT ${bvh_file_oro} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh_oro} - DEPENDS ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h) - - delayed_install("" "${bvh_file}" ${cycles_kernel_runtime_lib_target_path}) - delayed_install("" "${bvh_file_oro}" ${cycles_kernel_runtime_lib_target_path}) - - if(WITH_CYCLES_HIP_BINARIES) - set(hiprt_sources device/hiprt/kernel.cpp - ${SRC_KERNEL_HEADERS} - ${SRC_KERNEL_DEVICE_GPU_HEADERS} - ${SRC_KERNEL_DEVICE_HIPRT_HEADERS} - ${SRC_UTIL_HEADERS} - ) - - set(cycles_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc) - set(sdk_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd_lib.bc) - set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb) + set(hiprt_hipfb) + foreach(arch ${CYCLES_HIP_BINARIES_ARCH}) + set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_${arch}.hipfb) set(hiprt_file_compressed ${hiprt_file}.zst) - set(kernel_sources ${hiprt_sources}) - set(hiprt_kernel_src "/device/hiprt/kernel.cpp") - - set(hiprt_compile_flags_sdk_bc - ${hiprt_compile_flags} - ${target_gpus} - ${HIP_HIPCC_FLAGS} - ${flags} - -x hip - ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h - -D HIPRT_BITCODE_LINKING - -std=c++17 - -fgpu-rdc - -c - --gpu-bundle-output - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - -emit-llvm - -I ${HIPRT_INCLUDE_DIR} - -Wno-parentheses-equality - -Wno-unused-value - -ffast-math - -o ${sdk_bitcode_file}) - - set(hiprt_compile_flags_cycles_bc - ${hiprt_compile_flags} - ${target_gpus} - ${HIP_HIPCC_FLAGS} - ${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src} - ${flags} - -D CCL_NAMESPACE_BEGIN= - -D CCL_NAMESPACE_END= - -D HIPCC - -D __HIPRT__ - -std=c++17 - -fgpu-rdc - -c - --gpu-bundle-output - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - -emit-llvm - -I ${CMAKE_CURRENT_SOURCE_DIR}/.. - -I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt - -I ${HIPRT_INCLUDE_DIR} - -Wno-parentheses-equality - -Wno-unused-value - -ffast-math - -o ${cycles_bitcode_file} - ) - - add_custom_command( - OUTPUT ${cycles_bitcode_file} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_cycles_bc} - DEPENDS ${kernel_sources} - ) - - add_custom_command( - OUTPUT ${sdk_bitcode_file} - COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_sdk_bc} - DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h - ) - - if(WIN32) - set(hiprt_link_command ${CMAKE_COMMAND}) - set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}" - ${HIP_LINKER_EXECUTABLE} - ) - else() - set(hiprt_link_command ${HIP_LINKER_EXECUTABLE}) - set(hiprt_link_flags) - endif() - - set(hiprt_link_flags - ${hiprt_link_flags} - ${target_gpus} - -fgpu-rdc - --hip-link - --cuda-device-only - -parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS} - ${cycles_bitcode_file} - ${sdk_bitcode_file} - -o ${hiprt_file} - ) + set(hiprt_flags + ${hiprt_compile_flags} + --offload-arch=${arch} + -o ${hiprt_file}) add_custom_command( OUTPUT ${hiprt_file} - COMMAND ${hiprt_link_command} ${hiprt_link_flags} - DEPENDS ${cycles_bitcode_file} ${sdk_bitcode_file} + COMMAND ${hiprt_compile_command} ${hiprt_flags} + DEPENDS ${kernel_sources} ) - add_custom_command( OUTPUT ${hiprt_file_compressed} COMMAND "$" ${hiprt_file} ${hiprt_file_compressed} DEPENDS ${hiprt_file} ) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND hiprt_hipfb ${hiprt_file_compressed}) + endforeach() - endif() - - add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed} ${bvh_file} ${bvh_file_oro}) + add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_hipfb}) cycles_set_solution_folder(cycles_kernel_hiprt) endif() diff --git a/intern/cycles/kernel/device/hiprt/bvh.h b/intern/cycles/kernel/device/hiprt/bvh.h index 9f5bc69caa7..54aa36dbf41 100644 --- a/intern/cycles/kernel/device/hiprt/bvh.h +++ b/intern/cycles/kernel/device/hiprt/bvh.h @@ -30,6 +30,9 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg, return false; } + if (kernel_data.device_bvh == 0) + return false; + hiprtRay ray_hip; SET_HIPRT_RAY(ray_hip, ray) diff --git a/intern/cycles/kernel/device/hiprt/kernel.cpp b/intern/cycles/kernel/device/hiprt/kernel.cpp index 94670c6667a..5dcb72ef3c0 100644 --- a/intern/cycles/kernel/device/hiprt/kernel.cpp +++ b/intern/cycles/kernel/device/hiprt/kernel.cpp @@ -7,7 +7,7 @@ # include "kernel/device/hip/compat.h" # include "kernel/device/hip/config.h" -# include +# include # include "kernel/device/hiprt/globals.h" diff --git a/lib/linux_x64 b/lib/linux_x64 index d6873633784..85a8dca8860 160000 --- a/lib/linux_x64 +++ b/lib/linux_x64 @@ -1 +1 @@ -Subproject commit d6873633784d5fab59e98062e4f5b3179d62a397 +Subproject commit 85a8dca8860c7a7ee318530bcf0111d825993dd2 diff --git a/lib/windows_x64 b/lib/windows_x64 index d70d6d25313..9c935bb513b 160000 --- a/lib/windows_x64 +++ b/lib/windows_x64 @@ -1 +1 @@ -Subproject commit d70d6d253134478ed0673f080acba39a492ac619 +Subproject commit 9c935bb513b0a30da639ee09d64ea26ef98c2763