Cycles: Linux Support for HIP-RT
This change switches Cycles to an opensource HIP-RT library which
implements hardware ray-tracing. This library is now used on
both Windows and Linux. While there should be no noticeable changes
on Windows, on Linux this adds support for hardware ray-tracing on
AMD GPUs.
The majority of the change is typical platform code to add new
library to the dependency builder, and a change in the way how
ahead-of-time (AoT) kernels are compiled. There are changes in
Cycles itself, but they are rather straightforward: some APIs
changed in the opensource version of the library.
There are a couple of extra files which are needed for this to
work: hiprt02003_6.1_amd.hipfb and oro_compiled_kernels.hipfb.
There are some assumptions in the HIP-RT library about how they
are available. Currently they follow the same rule as AoT
kernels for oneAPI:
- On Windows they are next to blender.exe
- On Linux they are in the lib/ folder
Performance comparison on Ubuntu 22.04.5:
```
GPU: AMD Radeon PRO W7800
Driver: amdgpu-install_6.1.60103-1_all.deb
main hip-rt
attic 0.1414s 0.0932s
barbershop_interior 0.1563s 0.1258s
bistro 0.2134s 0.1597s
bmw27 0.0119s 0.0099s
classroom 0.1006s 0.0803s
fishy_cat 0.0248s 0.0178s
junkshop 0.0916s 0.0713s
koro 0.0589s 0.0720s
monster 0.0435s 0.0385s
pabellon 0.0543s 0.0391s
sponza 0.0223s 0.0180s
spring 0.1026s 1.5145s
victor 0.1901s 0.1239s
wdas_cloud 0.1153s 0.1125s
```
Co-authored-by: Brecht Van Lommel <brecht@blender.org>
Co-authored-by: Ray Molenkamp <github@lazydodo.com>
Co-authored-by: Sergey Sharybin <sergey@blender.org>
Pull Request: https://projects.blender.org/blender/blender/pulls/121050
This commit is contained in:
committed by
Sergey Sharybin
parent
dfa25d3f05
commit
26ed4d3892
@@ -719,11 +719,8 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
|
||||
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
|
||||
|
||||
# HIPRT is only available on Windows for now.
|
||||
if(WIN32)
|
||||
option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF)
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
|
||||
endif()
|
||||
option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF)
|
||||
mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
|
||||
endif()
|
||||
|
||||
# Apple Metal
|
||||
@@ -2825,8 +2822,6 @@ if(FIRST_RUN)
|
||||
info_cfg_option(WITH_CYCLES_ONEAPI_BINARIES)
|
||||
info_cfg_option(WITH_CYCLES_DEVICE_HIP)
|
||||
info_cfg_option(WITH_CYCLES_HIP_BINARIES)
|
||||
endif()
|
||||
if(WIN32)
|
||||
info_cfg_option(WITH_CYCLES_DEVICE_HIPRT)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -13,3 +13,5 @@ message(STATUS "Building in Rocky 8 Linux 64bit environment")
|
||||
|
||||
set(WITH_DOC_MANPAGE OFF CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_TEST_OSL ON CACHE BOOL "" FORCE)
|
||||
|
||||
set(HIPRT_COMPILER_PARALLEL_JOBS 4 CACHE STRING "" FORCE)
|
||||
|
||||
@@ -5,3 +5,5 @@
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/../../cmake/config/blender_release.cmake")
|
||||
|
||||
set(WITH_CYCLES_TEST_OSL ON CACHE BOOL "" FORCE)
|
||||
|
||||
set(HIPRT_COMPILER_PARALLEL_JOBS 4 CACHE STRING "" FORCE)
|
||||
@@ -37,18 +37,22 @@ find_program(HIP_HIPCC_EXECUTABLE
|
||||
)
|
||||
|
||||
if(WIN32)
|
||||
# Needed for HIP-RT on Windows.
|
||||
find_program(HIP_LINKER_EXECUTABLE
|
||||
NAMES
|
||||
clang++
|
||||
HINTS
|
||||
${_hip_SEARCH_DIRS}
|
||||
set(LINKER clang++)
|
||||
else()
|
||||
set(LINKER amdclang++)
|
||||
endif()
|
||||
|
||||
find_program(HIP_LINKER_EXECUTABLE
|
||||
NAMES
|
||||
${LINKER}
|
||||
HINTS
|
||||
${_hip_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
bin
|
||||
NO_DEFAULT_PATH
|
||||
NO_CMAKE_PATH
|
||||
)
|
||||
endif()
|
||||
)
|
||||
|
||||
|
||||
if(HIP_HIPCC_EXECUTABLE)
|
||||
set(HIP_VERSION_MAJOR 0)
|
||||
@@ -95,6 +99,7 @@ if(HIP_HIPCC_EXECUTABLE)
|
||||
|
||||
# Construct full semantic version.
|
||||
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
|
||||
set(HIP_VERSION_SHORT "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}")
|
||||
unset(_hip_version_raw)
|
||||
unset(_hipcc_executable)
|
||||
endif()
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
# Find HIPRT SDK. This module defines:
|
||||
# HIPRT_INCLUDE_DIR, path to HIPRT include directory
|
||||
# HIPRT_BITCODE, bitcode file with ray-tracing functionality
|
||||
# HIPRT_FOUND, if SDK found
|
||||
|
||||
if(NOT (DEFINED HIPRT_ROOT_DIR))
|
||||
@@ -23,36 +22,32 @@ endif()
|
||||
|
||||
set(_hiprt_SEARCH_DIRS
|
||||
${HIPRT_ROOT_DIR}
|
||||
/opt/lib/hiprt
|
||||
)
|
||||
|
||||
find_path(HIPRT_INCLUDE_DIR
|
||||
NAMES
|
||||
hiprt/hiprt.h
|
||||
HINTS
|
||||
${_hiprt_SEARCH_DIRS}/include
|
||||
${_hiprt_SEARCH_DIRS}
|
||||
PATH_SUFFIXES
|
||||
include
|
||||
)
|
||||
|
||||
set(HIPRT_VERSION)
|
||||
|
||||
if(HIPRT_INCLUDE_DIR)
|
||||
file(STRINGS "${HIPRT_INCLUDE_DIR}/hiprt/hiprt.h" _hiprt_version
|
||||
REGEX "^#define HIPRT_VERSION_STR[ \t]\".*\"$")
|
||||
string(REGEX MATCHALL "[0-9]+[.0-9]+" _hiprt_version ${_hiprt_version})
|
||||
|
||||
find_file(HIPRT_BITCODE
|
||||
NAMES
|
||||
hiprt${_hiprt_version}_amd_lib_win.bc
|
||||
HINTS
|
||||
${HIPRT_ROOT_DIR}/bin
|
||||
${HIPRT_ROOT_DIR}/dist/bin/Release
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
|
||||
unset(_hiprt_version)
|
||||
string(REGEX MATCHALL "[0-9]+[.0-9]+" HIPRT_VERSION ${_hiprt_version})
|
||||
endif()
|
||||
|
||||
unset(_hiprt_version)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(HIPRT DEFAULT_MSG
|
||||
HIPRT_INCLUDE_DIR HIPRT_BITCODE)
|
||||
find_package_handle_standard_args(HIPRT
|
||||
REQUIRED_VARS HIPRT_INCLUDE_DIR HIP_LINKER_EXECUTABLE
|
||||
FAIL_MESSAGE "HIP-RT or one of its dependencies not found")
|
||||
|
||||
mark_as_advanced(
|
||||
HIPRT_INCLUDE_DIR
|
||||
|
||||
@@ -90,6 +90,7 @@ if(NOT APPLE)
|
||||
# Can't use CMAKE_SYSTEM_PROCESSOR here as it's not set yet,
|
||||
# so fall back to checking the env for vcvarsall's VSCMD_ARG_TGT_ARCH
|
||||
if(NOT (WIN32 AND "$ENV{VSCMD_ARG_TGT_ARCH}" STREQUAL "arm64"))
|
||||
set(WITH_CYCLES_DEVICE_HIPRT ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
|
||||
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
|
||||
@@ -97,7 +98,3 @@ if(NOT APPLE)
|
||||
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(WIN32 AND NOT (WIN32 AND "$ENV{VSCMD_ARG_TGT_ARCH}" STREQUAL "arm64"))
|
||||
set(WITH_CYCLES_DEVICE_HIPRT ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
||||
@@ -629,6 +629,8 @@ if(DEFINED LIBDIR)
|
||||
without_system_libs_end()
|
||||
endif()
|
||||
|
||||
add_bundled_libraries(hiprt/lib)
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# Build and Link Flags
|
||||
|
||||
|
||||
112
extern/hipew/include/hiprtew.h
vendored
112
extern/hipew/include/hiprtew.h
vendored
@@ -20,63 +20,141 @@
|
||||
#include <hiprt/hiprt_types.h>
|
||||
|
||||
#define HIPRT_MAJOR_VERSION 2
|
||||
#define HIPRT_MINOR_VERSION 0
|
||||
#define HIPRT_PATCH_VERSION 0xb68861
|
||||
#define HIPRT_MINOR_VERSION 3
|
||||
#define HIPRT_PATCH_VERSION 0x7df94af
|
||||
|
||||
#define HIPRT_API_VERSION 2000
|
||||
#define HIPRT_VERSION_STR "02000"
|
||||
#define HIPRT_API_VERSION 2003
|
||||
#define HIPRT_VERSION_STR "02003"
|
||||
#define HIP_VERSION_STR "6.0"
|
||||
|
||||
#ifdef _WIN32
|
||||
#define HIPRTAPI __stdcall
|
||||
#else
|
||||
#define HIPRTAPI
|
||||
#define HIP_CB
|
||||
#endif
|
||||
|
||||
typedef unsigned int hiprtuint32_t;
|
||||
|
||||
/* Function types. */
|
||||
typedef hiprtError(thiprtCreateContext)(hiprtuint32_t hiprtApiVersion,
|
||||
hiprtContextCreationInput &input,
|
||||
const hiprtContextCreationInput &input,
|
||||
hiprtContext *outContext);
|
||||
typedef hiprtError(thiprtDestroyContext)(hiprtContext context);
|
||||
typedef hiprtError(thiprtCreateGeometry)(hiprtContext context,
|
||||
const hiprtGeometryBuildInput *buildInput,
|
||||
const hiprtBuildOptions *buildOptions,
|
||||
hiprtGeometry *outGeometry);
|
||||
const hiprtGeometryBuildInput &buildInput,
|
||||
const hiprtBuildOptions buildOptions,
|
||||
hiprtGeometry &outGeometry);
|
||||
typedef hiprtError(thiprtDestroyGeometry)(hiprtContext context,
|
||||
hiprtGeometry outGeometry);
|
||||
typedef hiprtError(thiprtCreateGeometries)(hiprtContext context,
|
||||
uint32_t numGeometries,
|
||||
const hiprtGeometryBuildInput *buildInput,
|
||||
const hiprtBuildOptions buildOptions,
|
||||
hiprtGeometry **outGeometries);
|
||||
typedef hiprtError(thiprtDestroyGeometries)(hiprtContext context, uint32_t numGeometries,
|
||||
hiprtGeometry* outGeometry);
|
||||
|
||||
typedef hiprtError(thiprtBuildGeometry)(hiprtContext context,
|
||||
hiprtBuildOperation buildOperation,
|
||||
const hiprtGeometryBuildInput &buildInput,
|
||||
const hiprtBuildOptions buildOptions,
|
||||
hiprtDevicePtr temporaryBuffer,
|
||||
hiprtApiStream stream,
|
||||
hiprtGeometry outGeometry);
|
||||
|
||||
typedef hiprtError(thiprtBuildGeometries)(hiprtContext context,
|
||||
uint32_t numGeometries,
|
||||
hiprtBuildOperation buildOperation,
|
||||
const hiprtGeometryBuildInput *buildInput,
|
||||
const hiprtBuildOptions *buildOptions,
|
||||
hiprtDevicePtr temporaryBuffer,
|
||||
hiprtApiStream stream,
|
||||
hiprtGeometry outGeometry);
|
||||
hiprtGeometry *outGeometries);
|
||||
|
||||
|
||||
typedef hiprtError(thiprtGetGeometryBuildTemporaryBufferSize)(
|
||||
hiprtContext context,
|
||||
const hiprtGeometryBuildInput &buildInput,
|
||||
const hiprtBuildOptions buildOptions,
|
||||
size_t &outSize);
|
||||
|
||||
typedef hiprtError(thiprtGetGeometriesBuildTemporaryBufferSize)(
|
||||
hiprtContext context,
|
||||
uint32_t numGeometries,
|
||||
const hiprtGeometryBuildInput *buildInput,
|
||||
const hiprtBuildOptions *buildOptions,
|
||||
size_t *outSize);
|
||||
size_t &outSize);
|
||||
|
||||
typedef hiprtError(thiprtCompactGeometry)( hiprtContext context, hiprtApiStream stream, hiprtGeometry geometryIn, hiprtGeometry& geometryOut);
|
||||
|
||||
typedef hiprtError(thiprtCompactGeometries)(
|
||||
hiprtContext context,
|
||||
uint32_t numGeometries,
|
||||
hiprtApiStream stream,
|
||||
hiprtGeometry* geometriesIn,
|
||||
hiprtGeometry** geometriesOut );
|
||||
|
||||
typedef hiprtError(thiprtCreateScene)(hiprtContext context,
|
||||
const hiprtSceneBuildInput &buildInput,
|
||||
const hiprtBuildOptions buildOptions,
|
||||
hiprtScene &outScene);
|
||||
|
||||
typedef hiprtError(thiprtCreateScenes)(hiprtContext context,
|
||||
uint32_t numScenes,
|
||||
const hiprtSceneBuildInput *buildInput,
|
||||
const hiprtBuildOptions *buildOptions,
|
||||
hiprtScene *outScene);
|
||||
const hiprtBuildOptions buildOptions,
|
||||
hiprtScene **outScene);
|
||||
|
||||
typedef hiprtError(thiprtDestroyScene)(hiprtContext context, hiprtScene outScene);
|
||||
typedef hiprtError(thiprtDestroyScenes)( hiprtContext context, uint32_t numScenes,hiprtScene *scene );
|
||||
typedef hiprtError(thiprtBuildScene)(hiprtContext context,
|
||||
hiprtBuildOperation buildOperation,
|
||||
const hiprtSceneBuildInput &buildInput,
|
||||
const hiprtBuildOptions buildOptions,
|
||||
hiprtDevicePtr temporaryBuffer,
|
||||
hiprtApiStream stream,
|
||||
hiprtScene outScene);
|
||||
typedef hiprtError(thiprtBuildScenes)(hiprtContext context,
|
||||
uint32_t numScenes,
|
||||
hiprtBuildOperation buildOperation,
|
||||
const hiprtSceneBuildInput *buildInput,
|
||||
const hiprtBuildOptions *buildOptions,
|
||||
hiprtDevicePtr temporaryBuffer,
|
||||
hiprtApiStream stream,
|
||||
hiprtScene outScene);
|
||||
hiprtScene *outScene);
|
||||
typedef hiprtError(thiprtGetSceneBuildTemporaryBufferSize)(
|
||||
hiprtContext context,
|
||||
const hiprtSceneBuildInput &buildInput,
|
||||
const hiprtBuildOptions buildOptions,
|
||||
size_t &outSize);
|
||||
|
||||
typedef hiprtError(thiprtGetScenesBuildTemporaryBufferSize)(
|
||||
hiprtContext context,
|
||||
uint32_t numScenes,
|
||||
const hiprtSceneBuildInput *buildInput,
|
||||
const hiprtBuildOptions *buildOptions,
|
||||
size_t *outSize);
|
||||
const hiprtBuildOptions buildOptions,
|
||||
size_t &outSize);
|
||||
|
||||
typedef hiprtError(thiprtCompactScene)( hiprtContext context, hiprtApiStream stream, hiprtScene sceneIn, hiprtScene& sceneOut );
|
||||
|
||||
typedef hiprtError(thiprtCompactScenes)(
|
||||
hiprtContext context, uint32_t numScenes, hiprtApiStream stream, hiprtScene* scenesIn, hiprtScene** scenesOut );
|
||||
|
||||
typedef hiprtError(thiprtCreateFuncTable)(hiprtContext context,
|
||||
hiprtuint32_t numGeomTypes,
|
||||
hiprtuint32_t numRayTypes,
|
||||
hiprtFuncTable *outFuncTable);
|
||||
hiprtFuncTable &outFuncTable);
|
||||
typedef hiprtError(thiprtSetFuncTable)(hiprtContext context,
|
||||
hiprtFuncTable funcTable,
|
||||
hiprtuint32_t geomType,
|
||||
hiprtuint32_t rayType,
|
||||
hiprtFuncDataSet set);
|
||||
|
||||
|
||||
typedef hiprtError (thiprtCreateGlobalStackBuffer)(hiprtContext context, const hiprtGlobalStackBufferInput& input, hiprtGlobalStackBuffer& stackBufferOut );
|
||||
typedef hiprtError (thiprtDestroyGlobalStackBuffer)( hiprtContext context, hiprtGlobalStackBuffer stackBuffer );
|
||||
|
||||
typedef hiprtError(thiprtDestroyFuncTable)(hiprtContext context,
|
||||
hiprtFuncTable funcTable);
|
||||
typedef void(thiprtSetLogLevel)( hiprtLogLevel level );
|
||||
@@ -94,6 +172,8 @@ extern thiprtBuildScene *hiprtBuildScene;
|
||||
extern thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
|
||||
extern thiprtCreateFuncTable *hiprtCreateFuncTable;
|
||||
extern thiprtSetFuncTable *hiprtSetFuncTable;
|
||||
extern thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer;
|
||||
extern thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer;
|
||||
extern thiprtDestroyFuncTable *hiprtDestroyFuncTable;
|
||||
extern thiprtSetLogLevel *hiprtSetLogLevel;
|
||||
|
||||
|
||||
1
extern/hipew/src/hipew.c
vendored
1
extern/hipew/src/hipew.c
vendored
@@ -234,7 +234,6 @@ static int hipewHipInit(void) {
|
||||
#ifdef _WIN32
|
||||
/* Expected in C:/Windows/System32 or similar, no path needed. */
|
||||
const char *hip_paths[] = {"amdhip64.dll", "amdhip64_6.dll", NULL};
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
/* Default installation path. */
|
||||
const char *hip_paths[] = {"", NULL};
|
||||
|
||||
13
extern/hipew/src/hiprtew.cc
vendored
13
extern/hipew/src/hiprtew.cc
vendored
@@ -40,6 +40,8 @@ thiprtBuildScene *hiprtBuildScene;
|
||||
thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
|
||||
thiprtCreateFuncTable *hiprtCreateFuncTable;
|
||||
thiprtSetFuncTable *hiprtSetFuncTable;
|
||||
thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer;
|
||||
thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer;
|
||||
thiprtDestroyFuncTable *hiprtDestroyFuncTable;
|
||||
thiprtSetLogLevel *hiprtSetLogLevel;
|
||||
|
||||
@@ -61,15 +63,17 @@ bool hiprtewInit()
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
initialized = true;
|
||||
|
||||
if (atexit(hipewHipRtExit)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string hiprt_ver(HIPRT_VERSION_STR);
|
||||
std::string hiprt_path = "hiprt" + hiprt_ver + "64.dll";
|
||||
#ifdef _WIN32
|
||||
std::string hiprt_path = "hiprt64.dll";
|
||||
#else
|
||||
std::string hiprt_path = "libhiprt64.so";
|
||||
#endif
|
||||
|
||||
hiprt_lib = dynamic_library_open(hiprt_path.c_str());
|
||||
|
||||
@@ -89,11 +93,12 @@ bool hiprtewInit()
|
||||
HIPRT_LIBRARY_FIND(hiprtGetSceneBuildTemporaryBufferSize)
|
||||
HIPRT_LIBRARY_FIND(hiprtCreateFuncTable)
|
||||
HIPRT_LIBRARY_FIND(hiprtSetFuncTable)
|
||||
HIPRT_LIBRARY_FIND(hiprtCreateGlobalStackBuffer)
|
||||
HIPRT_LIBRARY_FIND(hiprtDestroyFuncTable)
|
||||
HIPRT_LIBRARY_FIND(hiprtDestroyGlobalStackBuffer)
|
||||
HIPRT_LIBRARY_FIND(hiprtSetLogLevel)
|
||||
|
||||
result = true;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -1831,10 +1831,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
|
||||
if compute_device_type == 'HIP':
|
||||
import platform
|
||||
if platform.system() == "Windows": # HIP-RT is currently only supported on Windows
|
||||
row = layout.row()
|
||||
row.active = has_rt_api_support['HIP']
|
||||
row.prop(self, "use_hiprt")
|
||||
row = layout.row()
|
||||
row.active = has_rt_api_support['HIP']
|
||||
row.prop(self, "use_hiprt")
|
||||
|
||||
elif compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
|
||||
row = layout.row()
|
||||
|
||||
@@ -43,7 +43,7 @@ endif()
|
||||
###########################################################################
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIP)
|
||||
if(WITH_CYCLES_HIP_BINARIES)
|
||||
if(WITH_CYCLES_HIP_BINARIES OR WITH_CYCLES_DEVICE_HIPRT)
|
||||
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
|
||||
find_package(HIP 5.5.0)
|
||||
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
|
||||
@@ -55,6 +55,9 @@ if(WITH_CYCLES_DEVICE_HIP)
|
||||
|
||||
# HIP RT
|
||||
if(WITH_CYCLES_DEVICE_HIP AND WITH_CYCLES_DEVICE_HIPRT)
|
||||
if(DEFINED LIBDIR)
|
||||
set(HIPRT_ROOT_DIR ${LIBDIR}/hiprt)
|
||||
endif()
|
||||
find_package(HIPRT)
|
||||
set_and_warn_library_found("HIP RT" HIPRT_FOUND WITH_CYCLES_DEVICE_HIPRT)
|
||||
endif()
|
||||
|
||||
@@ -59,7 +59,6 @@ BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */)
|
||||
|
||||
HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
|
||||
: HIPDevice(info, stats, profiler, headless),
|
||||
global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY),
|
||||
hiprt_context(NULL),
|
||||
scene(NULL),
|
||||
functions_table(NULL),
|
||||
@@ -77,6 +76,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
prim_time_offset(this, "prim_time_offset", MEM_GLOBAL)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
global_stack_buffer = {0};
|
||||
hiprtContextCreationInput hiprt_context_input = {0};
|
||||
hiprt_context_input.ctxt = hipContext;
|
||||
hiprt_context_input.device = hipDevice;
|
||||
@@ -90,7 +90,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
||||
}
|
||||
|
||||
rt_result = hiprtCreateFuncTable(
|
||||
hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, &functions_table);
|
||||
hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table);
|
||||
|
||||
if (rt_result != hiprtSuccess) {
|
||||
set_error(string_printf("Failed to create HIPRT Function Table"));
|
||||
@@ -113,7 +113,8 @@ HIPRTDevice::~HIPRTDevice()
|
||||
custom_prim_info.free();
|
||||
prim_time_offset.free();
|
||||
prims_time.free();
|
||||
global_stack_buffer.free();
|
||||
|
||||
hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer);
|
||||
hiprtDestroyFuncTable(hiprt_context, functions_table);
|
||||
hiprtDestroyScene(hiprt_context, scene);
|
||||
hiprtDestroyContext(hiprt_context);
|
||||
@@ -156,12 +157,17 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
|
||||
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
|
||||
|
||||
const string include_path = source_path;
|
||||
const string bitcode_file = string_printf(
|
||||
const string cycles_bc = string_printf(
|
||||
"cycles_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str());
|
||||
const string bitcode = path_cache_get(path_join("kernels", bitcode_file));
|
||||
const string cycles_bitcode = path_cache_get(path_join("kernels", cycles_bc));
|
||||
const string fatbin_file = string_printf(
|
||||
"cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str());
|
||||
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
|
||||
const string hiprt_bc = string_printf(
|
||||
"hiprt_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str());
|
||||
const string hiprt_bitcode = path_cache_get(path_join("kernels", hiprt_bc));
|
||||
|
||||
const string hiprt_include_path = path_join(source_path, "kernel/device/hiprt");
|
||||
|
||||
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
|
||||
if (path_exists(fatbin)) {
|
||||
@@ -210,6 +216,12 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
|
||||
|
||||
path_create_directories(fatbin);
|
||||
|
||||
string rtc_options;
|
||||
rtc_options.append(" --offload-arch=").append(arch.c_str());
|
||||
rtc_options.append(" -D __HIPRT__");
|
||||
rtc_options.append(" -ffast-math -O3 -std=c++17");
|
||||
rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
|
||||
|
||||
source_path = path_join(path_join(source_path, "kernel"),
|
||||
path_join("device", path_join(base, string_printf("%s.cpp", name))));
|
||||
|
||||
@@ -217,25 +229,44 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
|
||||
|
||||
double starttime = time_dt();
|
||||
|
||||
const string hiprt_path = getenv("HIPRT_ROOT_DIR");
|
||||
// First, app kernels are compiled into bitcode, without access to implementation of HIP RT
|
||||
// functions
|
||||
if (!path_exists(bitcode)) {
|
||||
|
||||
std::string rtc_options;
|
||||
|
||||
rtc_options.append(" --offload-arch=").append(arch.c_str());
|
||||
rtc_options.append(" -D __HIPRT__");
|
||||
rtc_options.append(" -ffast-math -O3 -std=c++17");
|
||||
rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
|
||||
if (!path_exists(cycles_bitcode)) {
|
||||
|
||||
string command = string_printf("%s %s -I %s -I %s %s -o \"%s\"",
|
||||
hipcc,
|
||||
rtc_options.c_str(),
|
||||
include_path.c_str(),
|
||||
hiprt_path.c_str(),
|
||||
hiprt_include_path.c_str(),
|
||||
source_path.c_str(),
|
||||
bitcode.c_str());
|
||||
cycles_bitcode.c_str());
|
||||
|
||||
printf("Compiling %sHIP kernel ...\n%s\n",
|
||||
(use_adaptive_compilation()) ? "adaptive " : "",
|
||||
command.c_str());
|
||||
|
||||
# ifdef _WIN32
|
||||
command = "call " + command;
|
||||
# endif
|
||||
if (system(command.c_str()) != 0) {
|
||||
set_error(
|
||||
"Failed to execute compilation command, "
|
||||
"see console for details.");
|
||||
return string();
|
||||
}
|
||||
}
|
||||
|
||||
if (!path_exists(hiprt_bitcode)) {
|
||||
|
||||
rtc_options.append(" -x hip");
|
||||
rtc_options.append(" -D HIPRT_BITCODE_LINKING ");
|
||||
|
||||
string source_path = path_join(hiprt_include_path, "/hiprt/impl/hiprt_kernels_bitcode.h");
|
||||
|
||||
string command = string_printf("%s %s -I %s %s -o \"%s\"",
|
||||
hipcc,
|
||||
rtc_options.c_str(),
|
||||
hiprt_include_path.c_str(),
|
||||
source_path.c_str(),
|
||||
hiprt_bitcode.c_str());
|
||||
|
||||
printf("Compiling %sHIP kernel ...\n%s\n",
|
||||
(use_adaptive_compilation()) ? "adaptive " : "",
|
||||
@@ -257,13 +288,11 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
|
||||
string linker_options;
|
||||
linker_options.append(" --offload-arch=").append(arch.c_str());
|
||||
linker_options.append(" -fgpu-rdc --hip-link --cuda-device-only ");
|
||||
string hiprt_ver(HIPRT_VERSION_STR);
|
||||
string hiprt_bc = hiprt_path + "\\dist\\bin\\Release\\hiprt" + hiprt_ver + "_amd_lib_win.bc";
|
||||
|
||||
string linker_command = string_printf("clang++ %s \"%s\" %s -o \"%s\"",
|
||||
string linker_command = string_printf("clang++ %s \"%s\" \"%s\" -o \"%s\"",
|
||||
linker_options.c_str(),
|
||||
bitcode.c_str(),
|
||||
hiprt_bc.c_str(),
|
||||
cycles_bitcode.c_str(),
|
||||
hiprt_bitcode.c_str(),
|
||||
fatbin.c_str());
|
||||
|
||||
# ifdef _WIN32
|
||||
@@ -458,7 +487,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *
|
||||
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
|
||||
|
||||
geom_input.type = hiprtPrimitiveTypeAABBList;
|
||||
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
|
||||
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
|
||||
geom_input.geomType = Motion_Triangle;
|
||||
}
|
||||
else {
|
||||
@@ -490,7 +519,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *
|
||||
bvh->vertex_data.host_pointer = 0;
|
||||
|
||||
geom_input.type = hiprtPrimitiveTypeTriangleMesh;
|
||||
geom_input.triangleMesh.primitive = &(bvh->triangle_mesh);
|
||||
geom_input.primitive.triangleMesh = bvh->triangle_mesh;
|
||||
}
|
||||
|
||||
return geom_input;
|
||||
@@ -629,7 +658,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh, Hair *hai
|
||||
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
|
||||
|
||||
geom_input.type = hiprtPrimitiveTypeAABBList;
|
||||
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
|
||||
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
|
||||
geom_input.geomType = Curve;
|
||||
|
||||
return geom_input;
|
||||
@@ -732,7 +761,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh, PointClou
|
||||
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
|
||||
|
||||
geom_input.type = hiprtPrimitiveTypeAABBList;
|
||||
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
|
||||
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
|
||||
geom_input.geomType = Point;
|
||||
|
||||
return geom_input;
|
||||
@@ -779,13 +808,13 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op
|
||||
|
||||
size_t blas_scratch_buffer_size = 0;
|
||||
hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
|
||||
hiprt_context, &geom_input, &options, &blas_scratch_buffer_size);
|
||||
hiprt_context, geom_input, options, blas_scratch_buffer_size);
|
||||
|
||||
if (rt_err != hiprtSuccess) {
|
||||
set_error(string_printf("Failed to get scratch buffer size for BLAS!"));
|
||||
}
|
||||
|
||||
rt_err = hiprtCreateGeometry(hiprt_context, &geom_input, &options, &bvh->hiprt_geom);
|
||||
rt_err = hiprtCreateGeometry(hiprt_context, geom_input, options, bvh->hiprt_geom);
|
||||
|
||||
if (rt_err != hiprtSuccess) {
|
||||
set_error(string_printf("Failed to create BLAS!"));
|
||||
@@ -800,8 +829,8 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op
|
||||
}
|
||||
rt_err = hiprtBuildGeometry(hiprt_context,
|
||||
hiprtBuildOperationBuild,
|
||||
&bvh->geom_input,
|
||||
&options,
|
||||
bvh->geom_input,
|
||||
options,
|
||||
(void *)(scratch_buffer.device_pointer),
|
||||
0,
|
||||
bvh->hiprt_geom);
|
||||
@@ -951,7 +980,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
|
||||
|
||||
user_instance_id[num_instances] = blender_instance_id;
|
||||
prim_visibility[num_instances] = mask;
|
||||
hiprt_blas_ptr[num_instances] = (uint64_t)hiprt_geom_current;
|
||||
hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current;
|
||||
hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry;
|
||||
num_instances++;
|
||||
}
|
||||
blas_ptr[blender_instance_id] = (uint64_t)hiprt_geom_current;
|
||||
@@ -981,13 +1011,13 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
|
||||
}
|
||||
|
||||
scene_input_ptr.instanceMasks = (void *)prim_visibility.device_pointer;
|
||||
scene_input_ptr.instanceGeometries = (void *)hiprt_blas_ptr.device_pointer;
|
||||
scene_input_ptr.instances = (void *)hiprt_blas_ptr.device_pointer;
|
||||
scene_input_ptr.instanceTransformHeaders = (void *)transform_headers.device_pointer;
|
||||
scene_input_ptr.instanceFrames = (void *)instance_transform_matrix.device_pointer;
|
||||
|
||||
hiprtScene scene = 0;
|
||||
|
||||
hiprtError rt_err = hiprtCreateScene(hiprt_context, &scene_input_ptr, &options, &scene);
|
||||
hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr, options, scene);
|
||||
|
||||
if (rt_err != hiprtSuccess) {
|
||||
set_error(string_printf("Failed to create TLAS"));
|
||||
@@ -995,7 +1025,7 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
|
||||
|
||||
size_t tlas_scratch_buffer_size;
|
||||
rt_err = hiprtGetSceneBuildTemporaryBufferSize(
|
||||
hiprt_context, &scene_input_ptr, &options, &tlas_scratch_buffer_size);
|
||||
hiprt_context, scene_input_ptr, options, tlas_scratch_buffer_size);
|
||||
|
||||
if (rt_err != hiprtSuccess) {
|
||||
set_error(string_printf("Failed to get scratch buffer size for TLAS"));
|
||||
@@ -1008,8 +1038,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
|
||||
|
||||
rt_err = hiprtBuildScene(hiprt_context,
|
||||
build_operation,
|
||||
&scene_input_ptr,
|
||||
&options,
|
||||
scene_input_ptr,
|
||||
options,
|
||||
(void *)scratch_buffer.device_pointer,
|
||||
0,
|
||||
scene);
|
||||
|
||||
@@ -53,7 +53,7 @@ class HIPRTDevice : public HIPDevice {
|
||||
return hiprt_context;
|
||||
}
|
||||
|
||||
device_vector<int> global_stack_buffer;
|
||||
hiprtGlobalStackBuffer global_stack_buffer;
|
||||
|
||||
protected:
|
||||
enum Filter_Function { Closest = 0, Shadows, Local, Volume, Max_Intersect_Filter_Function };
|
||||
@@ -111,7 +111,7 @@ class HIPRTDevice : public HIPDevice {
|
||||
* blas_ptr has all the valid pointers and null pointers and blas for any geometry can be
|
||||
* directly retrieved from this array (used in subsurface scattering). */
|
||||
device_vector<int> user_instance_id;
|
||||
device_vector<uint64_t> hiprt_blas_ptr;
|
||||
device_vector<hiprtInstance> hiprt_blas_ptr;
|
||||
device_vector<uint64_t> blas_ptr;
|
||||
|
||||
/* custom_prim_info stores custom information for custom primitives for all the primitives in a
|
||||
|
||||
@@ -34,14 +34,25 @@ bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
|
||||
const HIPContextScope scope(hiprt_device_);
|
||||
const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
|
||||
|
||||
if (!hiprt_device_->global_stack_buffer.device_pointer) {
|
||||
int max_path = num_concurrent_states(0);
|
||||
hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
|
||||
hiprt_device_->global_stack_buffer.zero_to_device();
|
||||
if (!hiprt_device_->global_stack_buffer.stackData) {
|
||||
uint32_t max_path = num_concurrent_states(0);
|
||||
hiprtGlobalStackBufferInput stack_buffer_input{
|
||||
hiprtStackTypeGlobal, hiprtStackEntryTypeInteger, HIPRT_THREAD_STACK_SIZE, max_path};
|
||||
|
||||
hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(),
|
||||
stack_buffer_input,
|
||||
hiprt_device_->global_stack_buffer);
|
||||
|
||||
if (rt_result != hiprtSuccess) {
|
||||
LOG(ERROR) << "Failed to create hiprt Global Stack Buffer";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
DeviceKernelArguments args_copy = args;
|
||||
args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
|
||||
args_copy.add(DeviceKernelArguments::HIPRT_GLOBAL_STACK,
|
||||
(void *)(&hiprt_device_->global_stack_buffer),
|
||||
sizeof(hiprtGlobalStackBuffer));
|
||||
|
||||
/* Compute kernel launch parameters. */
|
||||
const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
|
||||
|
||||
@@ -28,6 +28,7 @@ struct DeviceKernelArguments {
|
||||
INT32,
|
||||
FLOAT32,
|
||||
KERNEL_FILM_CONVERT,
|
||||
HIPRT_GLOBAL_STACK,
|
||||
};
|
||||
|
||||
static const int MAX_ARGS = 18;
|
||||
|
||||
@@ -92,6 +92,28 @@ set(SRC_KERNEL_DEVICE_HIPRT_HEADERS
|
||||
device/hiprt/hiprt_kernels.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_HIPRT_SDK
|
||||
hiprt/impl/Aabb.h
|
||||
hiprt/impl/BvhNode.h
|
||||
hiprt/impl/Geometry.h
|
||||
hiprt/impl/hiprt_device_impl.h
|
||||
hiprt/impl/hiprt_kernels_bitcode.h
|
||||
hiprt/impl/Instance.h
|
||||
hiprt/impl/Math.h
|
||||
hiprt/impl/QrDecomposition.h
|
||||
hiprt/impl/Quaternion.h
|
||||
hiprt/impl/Scene.h
|
||||
hiprt/impl/Transform.h
|
||||
hiprt/impl/Triangle.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS
|
||||
hiprt/hiprt_common.h
|
||||
hiprt/hiprt_device.h
|
||||
hiprt/hiprt_types.h
|
||||
hiprt/hiprt_vec.h
|
||||
)
|
||||
|
||||
set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
|
||||
device/optix/bvh.h
|
||||
device/optix/compat.h
|
||||
@@ -422,6 +444,21 @@ add_executable(zstd_compress ../cmake/zstd_compress.cpp)
|
||||
target_include_directories(zstd_compress SYSTEM PRIVATE ${ZSTD_INCLUDE_DIRS})
|
||||
target_link_libraries(zstd_compress ${ZSTD_LIBRARIES} ${PTHREADS_LIBRARIES})
|
||||
|
||||
if(NOT WITH_BLENDER)
|
||||
# For the Cycles standalone put libraries next to the Cycles application.
|
||||
set(cycles_kernel_runtime_lib_target_path ${CYCLES_INSTALL_PATH})
|
||||
else()
|
||||
# For Blender put the libraries next to the Blender executable.
|
||||
#
|
||||
# Note that the installation path in the delayed_install is relative to the versioned folder,
|
||||
# which means we need to go one level up.
|
||||
set(cycles_kernel_runtime_lib_target_path "../")
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE)
|
||||
set(cycles_kernel_runtime_lib_target_path ${cycles_kernel_runtime_lib_target_path}/lib)
|
||||
endif()
|
||||
|
||||
# CUDA module
|
||||
|
||||
if(WITH_CYCLES_CUDA_BINARIES)
|
||||
@@ -689,17 +726,13 @@ endif()
|
||||
|
||||
# HIP RT module
|
||||
|
||||
if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
|
||||
set(hiprt_sources device/hiprt/kernel.cpp
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
|
||||
${SRC_UTIL_HEADERS})
|
||||
set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
|
||||
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
|
||||
set(hiprt_file_compressed ${hiprt_file}.zst)
|
||||
set(kernel_sources ${hiprt_sources})
|
||||
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
|
||||
if(WITH_CYCLES_DEVICE_HIPRT)
|
||||
set(HIPRT_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for for HIP-RT kernels")
|
||||
mark_as_advanced(HIPRT_COMPILER_PARALLEL_JOBS)
|
||||
|
||||
set(bvh_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd.hipfb)
|
||||
set(bvh_file_oro ${CMAKE_CURRENT_BINARY_DIR}/oro_compiled_kernels.hipfb)
|
||||
|
||||
if(WIN32)
|
||||
set(hiprt_compile_command ${CMAKE_COMMAND})
|
||||
set(hiprt_compile_flags
|
||||
@@ -713,7 +746,106 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
|
||||
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
|
||||
list(APPEND target_gpus "--offload-arch=${arch}")
|
||||
endforeach()
|
||||
set(hiprt_compile_flags
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
|
||||
set(hiprt_compile_flags_bvh
|
||||
${hiprt_compile_flags}
|
||||
${target_gpus}
|
||||
${HIP_HIPCC_FLAGS}
|
||||
-x hip
|
||||
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h
|
||||
${flags}
|
||||
-D HIPRT_BITCODE_LINKING
|
||||
-std=c++17
|
||||
-mllvm
|
||||
-amdgpu-early-inline-all=false
|
||||
-mllvm
|
||||
-amdgpu-function-calls=true
|
||||
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
||||
--genco
|
||||
-I ${HIPRT_INCLUDE_DIR}
|
||||
-Wno-parentheses-equality
|
||||
-Wno-unused-value
|
||||
-ffast-math
|
||||
-o ${bvh_file})
|
||||
|
||||
set(hiprt_compile_flags_bvh_oro
|
||||
${hiprt_compile_flags}
|
||||
${target_gpus}
|
||||
${HIP_HIPCC_FLAGS}
|
||||
-x hip
|
||||
${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h
|
||||
${flags}
|
||||
-D HIPRT_BITCODE_LINKING
|
||||
-std=c++17
|
||||
-mllvm
|
||||
-amdgpu-early-inline-all=false
|
||||
-mllvm
|
||||
-amdgpu-function-calls=true
|
||||
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
||||
--genco
|
||||
-I ${HIPRT_INCLUDE_DIR}/contrib/Orochi
|
||||
-include hip/hip_runtime.h
|
||||
-Wno-parentheses-equality
|
||||
-Wno-unused-value
|
||||
-ffast-math
|
||||
-o ${bvh_file_oro})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${bvh_file}
|
||||
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh}
|
||||
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${bvh_file_oro}
|
||||
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh_oro}
|
||||
DEPENDS ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h)
|
||||
|
||||
delayed_install("" "${bvh_file}" ${cycles_kernel_runtime_lib_target_path})
|
||||
delayed_install("" "${bvh_file_oro}" ${cycles_kernel_runtime_lib_target_path})
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES)
|
||||
set(hiprt_sources device/hiprt/kernel.cpp
|
||||
${SRC_KERNEL_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
||||
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
|
||||
${SRC_UTIL_HEADERS})
|
||||
|
||||
set(cycles_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
|
||||
set(sdk_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd_lib.bc)
|
||||
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
|
||||
set(hiprt_file_compressed ${hiprt_file}.zst)
|
||||
set(kernel_sources ${hiprt_sources})
|
||||
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
|
||||
|
||||
set(hiprt_compile_flags_sdk_bc
|
||||
${hiprt_compile_flags}
|
||||
${target_gpus}
|
||||
${HIP_HIPCC_FLAGS}
|
||||
${flags}
|
||||
-x hip
|
||||
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h
|
||||
-D HIPRT_BITCODE_LINKING
|
||||
-std=c++17
|
||||
-fgpu-rdc
|
||||
-c
|
||||
--gpu-bundle-output
|
||||
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
||||
-emit-llvm
|
||||
-I ${HIPRT_INCLUDE_DIR}
|
||||
-Wno-parentheses-equality
|
||||
-Wno-unused-value
|
||||
-ffast-math
|
||||
-o ${sdk_bitcode_file})
|
||||
|
||||
set(hiprt_compile_flags_cycles_bc
|
||||
${hiprt_compile_flags}
|
||||
${target_gpus}
|
||||
${HIP_HIPCC_FLAGS}
|
||||
@@ -727,6 +859,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
|
||||
-fgpu-rdc
|
||||
-c
|
||||
--gpu-bundle-output
|
||||
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
||||
-emit-llvm
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt
|
||||
@@ -734,45 +867,52 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
|
||||
-Wno-parentheses-equality
|
||||
-Wno-unused-value
|
||||
-ffast-math
|
||||
-o ${bitcode_file})
|
||||
-o ${cycles_bitcode_file})
|
||||
|
||||
if(WITH_NANOVDB)
|
||||
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB)
|
||||
endif()
|
||||
|
||||
if(WITH_CYCLES_DEBUG)
|
||||
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
|
||||
endif()
|
||||
add_custom_command(
|
||||
OUTPUT ${bitcode_file}
|
||||
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
|
||||
OUTPUT ${cycles_bitcode_file}
|
||||
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_cycles_bc}
|
||||
DEPENDS ${kernel_sources})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${sdk_bitcode_file}
|
||||
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_sdk_bc}
|
||||
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h)
|
||||
|
||||
if(WIN32)
|
||||
set(hiprt_link_command ${CMAKE_COMMAND})
|
||||
set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}"
|
||||
${HIP_LINKER_EXECUTABLE})
|
||||
else()
|
||||
# not implemented yet
|
||||
set(hiprt_link_command ${HIP_LINKER_EXECUTABLE})
|
||||
set(hiprt_link_flags)
|
||||
endif()
|
||||
|
||||
set(hiprt_link_flags
|
||||
${hiprt_link_flags}
|
||||
${target_gpus}
|
||||
-fgpu-rdc
|
||||
--hip-link
|
||||
--cuda-device-only
|
||||
${bitcode_file}
|
||||
${HIPRT_BITCODE}
|
||||
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
||||
${cycles_bitcode_file}
|
||||
${sdk_bitcode_file}
|
||||
-o ${hiprt_file})
|
||||
add_custom_command(
|
||||
OUTPUT ${hiprt_file}
|
||||
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
|
||||
DEPENDS ${bitcode_file})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${hiprt_file}
|
||||
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
|
||||
DEPENDS ${cycles_bitcode_file} ${sdk_bitcode_file})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${hiprt_file_compressed}
|
||||
COMMAND "$<TARGET_FILE:zstd_compress>" ${hiprt_file} ${hiprt_file_compressed}
|
||||
DEPENDS ${hiprt_file})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed})
|
||||
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
||||
|
||||
endif()
|
||||
|
||||
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed} ${bvh_file} ${bvh_file_oro})
|
||||
cycles_set_solution_folder(cycles_kernel_hiprt)
|
||||
endif()
|
||||
|
||||
@@ -1151,23 +1291,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
DEPENDS ${cycles_oneapi_kernel_sources})
|
||||
endif()
|
||||
|
||||
if(NOT WITH_BLENDER)
|
||||
# For the Cycles standalone put libraries next to the Cycles application.
|
||||
set(cycles_oneapi_target_path ${CYCLES_INSTALL_PATH})
|
||||
else()
|
||||
# For Blender put the libraries next to the Blender executable.
|
||||
#
|
||||
# Note that the installation path in the delayed_install is relative to the versioned folder,
|
||||
# which means we need to go one level up.
|
||||
set(cycles_oneapi_target_path "../")
|
||||
endif()
|
||||
|
||||
# install dynamic libraries required at runtime
|
||||
if(WIN32)
|
||||
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path})
|
||||
elseif(UNIX AND NOT APPLE)
|
||||
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}/lib)
|
||||
endif()
|
||||
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_kernel_runtime_lib_target_path})
|
||||
|
||||
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
|
||||
endif()
|
||||
@@ -1287,6 +1412,10 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
|
||||
if(WITH_CYCLES_DEVICE_HIPRT)
|
||||
delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt)
|
||||
delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt/impl)
|
||||
endif()
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
||||
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
||||
|
||||
@@ -125,7 +125,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
|
||||
// we don't need custom intersection functions for SSR
|
||||
# ifdef HIPRT_SHARED_STACK
|
||||
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal(local_geom,
|
||||
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
|
||||
ray_hip,
|
||||
stack,
|
||||
hiprtTraversalHintDefault,
|
||||
|
||||
@@ -44,36 +44,38 @@ struct LocalPayload {
|
||||
|
||||
# if defined(HIPRT_SHARED_STACK)
|
||||
# define GET_TRAVERSAL_STACK() \
|
||||
Stack stack(&kg->global_stack_buffer[0], \
|
||||
HIPRT_THREAD_STACK_SIZE, \
|
||||
kg->shared_stack, \
|
||||
HIPRT_SHARED_STACK_SIZE);
|
||||
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
|
||||
Instance_Stack instance_stack;
|
||||
# else
|
||||
# define GET_TRAVERSAL_STACK()
|
||||
# endif
|
||||
|
||||
# ifdef HIPRT_SHARED_STACK
|
||||
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
|
||||
hiprtSceneTraversalAnyHitCustomStack<Stack> traversal(kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
|
||||
(hiprtScene)kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
instance_stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
|
||||
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
|
||||
hiprtSceneTraversalClosestCustomStack<Stack> traversal(kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
|
||||
(hiprtScene)kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
instance_stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
# else
|
||||
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
|
||||
hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
|
||||
@@ -654,14 +656,14 @@ ccl_device_inline bool volume_intersection_filter(const hiprtRay &ray,
|
||||
return false;
|
||||
}
|
||||
|
||||
HIPRT_DEVICE bool intersectFunc(u32 geomType,
|
||||
u32 rayType,
|
||||
HIPRT_DEVICE bool intersectFunc(uint geomType,
|
||||
uint rayType,
|
||||
const hiprtFuncTableHeader &tableHeader,
|
||||
const hiprtRay &ray,
|
||||
void *payload,
|
||||
hiprtHit &hit)
|
||||
{
|
||||
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
|
||||
const uint index = tableHeader.numGeomTypes * rayType + geomType;
|
||||
const void *data = tableHeader.funcDataSets[index].filterFuncData;
|
||||
switch (index) {
|
||||
case Curve_Intersect_Function:
|
||||
@@ -683,14 +685,14 @@ HIPRT_DEVICE bool intersectFunc(u32 geomType,
|
||||
return false;
|
||||
}
|
||||
|
||||
HIPRT_DEVICE bool filterFunc(u32 geomType,
|
||||
u32 rayType,
|
||||
HIPRT_DEVICE bool filterFunc(uint geomType,
|
||||
uint rayType,
|
||||
const hiprtFuncTableHeader &tableHeader,
|
||||
const hiprtRay &ray,
|
||||
void *payload,
|
||||
const hiprtHit &hit)
|
||||
{
|
||||
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
|
||||
const uint index = tableHeader.numGeomTypes * rayType + geomType;
|
||||
const void *data = tableHeader.funcDataSets[index].intersectFuncData;
|
||||
switch (index) {
|
||||
case Triangle_Filter_Closest:
|
||||
|
||||
@@ -31,9 +31,9 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelGlobalsGPU {
|
||||
int *global_stack_buffer;
|
||||
hiprtGlobalStackBuffer global_stack_buffer;
|
||||
#ifdef HIPRT_SHARED_STACK
|
||||
int *shared_stack;
|
||||
hiprtSharedStackBuffer shared_stack;
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -47,7 +47,8 @@ typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
|
||||
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
|
||||
ccl_global KernelGlobalsGPU kg_gpu; \
|
||||
KernelGlobals kg = &kg_gpu; \
|
||||
kg->shared_stack = &shared_stack[0]; \
|
||||
kg->shared_stack.stackData = &shared_stack[0]; \
|
||||
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
|
||||
kg->global_stack_buffer = stack_buffer;
|
||||
#else
|
||||
# define HIPRT_INIT_KERNEL_GLOBAL() \
|
||||
@@ -146,6 +147,7 @@ __constant__ KernelParamsHIPRT kernel_params;
|
||||
|
||||
# ifdef HIPRT_SHARED_STACK
|
||||
typedef hiprtGlobalStack Stack;
|
||||
typedef hiprtEmptyInstanceStack Instance_Stack;
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -9,7 +9,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
||||
ccl_global const int *path_index_array,
|
||||
ccl_global float *render_buffer,
|
||||
const int work_size,
|
||||
ccl_global int *stack_buffer)
|
||||
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
@@ -25,7 +25,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
||||
ccl_gpu_kernel_signature(integrator_intersect_shadow,
|
||||
ccl_global const int *path_index_array,
|
||||
const int work_size,
|
||||
ccl_global int *stack_buffer)
|
||||
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
@@ -41,7 +41,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
||||
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
|
||||
ccl_global const int *path_index_array,
|
||||
const int work_size,
|
||||
ccl_global int *stack_buffer)
|
||||
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
@@ -57,7 +57,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
||||
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
|
||||
ccl_global const int *path_index_array,
|
||||
const int work_size,
|
||||
ccl_global int *stack_buffer)
|
||||
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
@@ -72,7 +72,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
||||
ccl_gpu_kernel_signature(integrator_intersect_dedicated_light,
|
||||
ccl_global const int *path_index_array,
|
||||
const int work_size,
|
||||
ccl_global int *stack_buffer)
|
||||
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
|
||||
@@ -89,7 +89,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
||||
ccl_global const int *path_index_array,
|
||||
ccl_global float *render_buffer,
|
||||
const int work_size,
|
||||
ccl_global int *stack_buffer)
|
||||
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
if (global_index < work_size) {
|
||||
@@ -104,7 +104,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
||||
ccl_global const int *path_index_array,
|
||||
ccl_global float *render_buffer,
|
||||
const int work_size,
|
||||
ccl_global int *stack_buffer)
|
||||
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
||||
{
|
||||
const int global_index = ccl_gpu_global_id_x();
|
||||
if (global_index < work_size) {
|
||||
|
||||
Submodule lib/linux_x64 updated: 15d135d101...2b125e847c
Submodule lib/windows_x64 updated: bb7ae0e107...efa049df4c
@@ -1899,6 +1899,17 @@ if(WIN32)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
if(WITH_CYCLES_DEVICE_HIPRT)
|
||||
if(EXISTS ${LIBDIR}/hiprt/bin/hiprt64.dll)
|
||||
install(
|
||||
FILES ${LIBDIR}/hiprt/bin/hiprt64.dll
|
||||
DESTINATION "./"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# `vcpkg` substitutes our libraries with theirs, which will cause issues when you you run
|
||||
# these builds on other systems due to missing DLL's. So we opt out the use of `vcpkg`.
|
||||
if(WIN32)
|
||||
|
||||
Reference in New Issue
Block a user