Cycles: Linux Support for HIP-RT

This change switches Cycles to an opensource HIP-RT library which
implements hardware ray-tracing. This library is now used on
both Windows and Linux. While there should be no noticeable changes
on Windows, on Linux this adds support for hardware ray-tracing on
AMD GPUs.

The majority of the change is typical platform code to add new
library to the dependency builder, and a change in the way how
ahead-of-time (AoT) kernels are compiled. There are changes in
Cycles itself, but they are rather straightforward: some APIs
changed in the opensource version of the library.

There are a couple of extra files which are needed for this to
work: hiprt02003_6.1_amd.hipfb and oro_compiled_kernels.hipfb.
There are some assumptions in the HIP-RT library about how they
are available. Currently they follow the same rule as AoT
kernels for oneAPI:
- On Windows they are next to blender.exe
- On Linux they are in the lib/ folder

Performance comparison on Ubuntu 22.04.5:
```
GPU: AMD Radeon PRO W7800
Driver: amdgpu-install_6.1.60103-1_all.deb
                       main         hip-rt
attic                  0.1414s      0.0932s
barbershop_interior    0.1563s      0.1258s
bistro                 0.2134s      0.1597s
bmw27                  0.0119s      0.0099s
classroom              0.1006s      0.0803s
fishy_cat              0.0248s      0.0178s
junkshop               0.0916s      0.0713s
koro                   0.0589s      0.0720s
monster                0.0435s      0.0385s
pabellon               0.0543s      0.0391s
sponza                 0.0223s      0.0180s
spring                 0.1026s      1.5145s
victor                 0.1901s      0.1239s
wdas_cloud             0.1153s      0.1125s
```

Co-authored-by: Brecht Van Lommel <brecht@blender.org>
Co-authored-by: Ray Molenkamp <github@lazydodo.com>
Co-authored-by: Sergey Sharybin <sergey@blender.org>

Pull Request: https://projects.blender.org/blender/blender/pulls/121050
This commit is contained in:
Sahar A. Kashi
2024-09-24 14:35:24 +02:00
committed by Sergey Sharybin
parent dfa25d3f05
commit 26ed4d3892
24 changed files with 463 additions and 193 deletions

View File

@@ -719,11 +719,8 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
mark_as_advanced(WITH_CYCLES_DEVICE_HIP)
mark_as_advanced(CYCLES_HIP_BINARIES_ARCH)
# HIPRT is only available on Windows for now.
if(WIN32)
option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF)
mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
endif()
option(WITH_CYCLES_DEVICE_HIPRT "Enable Cycles AMD HIPRT support" OFF)
mark_as_advanced(WITH_CYCLES_DEVICE_HIPRT)
endif()
# Apple Metal
@@ -2825,8 +2822,6 @@ if(FIRST_RUN)
info_cfg_option(WITH_CYCLES_ONEAPI_BINARIES)
info_cfg_option(WITH_CYCLES_DEVICE_HIP)
info_cfg_option(WITH_CYCLES_HIP_BINARIES)
endif()
if(WIN32)
info_cfg_option(WITH_CYCLES_DEVICE_HIPRT)
endif()
endif()

View File

@@ -13,3 +13,5 @@ message(STATUS "Building in Rocky 8 Linux 64bit environment")
set(WITH_DOC_MANPAGE OFF CACHE BOOL "" FORCE)
set(WITH_CYCLES_TEST_OSL ON CACHE BOOL "" FORCE)
set(HIPRT_COMPILER_PARALLEL_JOBS 4 CACHE STRING "" FORCE)

View File

@@ -5,3 +5,5 @@
include("${CMAKE_CURRENT_LIST_DIR}/../../cmake/config/blender_release.cmake")
set(WITH_CYCLES_TEST_OSL ON CACHE BOOL "" FORCE)
set(HIPRT_COMPILER_PARALLEL_JOBS 4 CACHE STRING "" FORCE)

View File

@@ -37,18 +37,22 @@ find_program(HIP_HIPCC_EXECUTABLE
)
if(WIN32)
# Needed for HIP-RT on Windows.
find_program(HIP_LINKER_EXECUTABLE
NAMES
clang++
HINTS
${_hip_SEARCH_DIRS}
set(LINKER clang++)
else()
set(LINKER amdclang++)
endif()
find_program(HIP_LINKER_EXECUTABLE
NAMES
${LINKER}
HINTS
${_hip_SEARCH_DIRS}
PATH_SUFFIXES
bin
NO_DEFAULT_PATH
NO_CMAKE_PATH
)
endif()
)
if(HIP_HIPCC_EXECUTABLE)
set(HIP_VERSION_MAJOR 0)
@@ -95,6 +99,7 @@ if(HIP_HIPCC_EXECUTABLE)
# Construct full semantic version.
set(HIP_VERSION "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}.${HIP_VERSION_PATCH}")
set(HIP_VERSION_SHORT "${HIP_VERSION_MAJOR}.${HIP_VERSION_MINOR}")
unset(_hip_version_raw)
unset(_hipcc_executable)
endif()

View File

@@ -4,7 +4,6 @@
# Find HIPRT SDK. This module defines:
# HIPRT_INCLUDE_DIR, path to HIPRT include directory
# HIPRT_BITCODE, bitcode file with ray-tracing functionality
# HIPRT_FOUND, if SDK found
if(NOT (DEFINED HIPRT_ROOT_DIR))
@@ -23,36 +22,32 @@ endif()
set(_hiprt_SEARCH_DIRS
${HIPRT_ROOT_DIR}
/opt/lib/hiprt
)
find_path(HIPRT_INCLUDE_DIR
NAMES
hiprt/hiprt.h
HINTS
${_hiprt_SEARCH_DIRS}/include
${_hiprt_SEARCH_DIRS}
PATH_SUFFIXES
include
)
set(HIPRT_VERSION)
if(HIPRT_INCLUDE_DIR)
file(STRINGS "${HIPRT_INCLUDE_DIR}/hiprt/hiprt.h" _hiprt_version
REGEX "^#define HIPRT_VERSION_STR[ \t]\".*\"$")
string(REGEX MATCHALL "[0-9]+[.0-9]+" _hiprt_version ${_hiprt_version})
find_file(HIPRT_BITCODE
NAMES
hiprt${_hiprt_version}_amd_lib_win.bc
HINTS
${HIPRT_ROOT_DIR}/bin
${HIPRT_ROOT_DIR}/dist/bin/Release
NO_DEFAULT_PATH
)
unset(_hiprt_version)
string(REGEX MATCHALL "[0-9]+[.0-9]+" HIPRT_VERSION ${_hiprt_version})
endif()
unset(_hiprt_version)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(HIPRT DEFAULT_MSG
HIPRT_INCLUDE_DIR HIPRT_BITCODE)
find_package_handle_standard_args(HIPRT
REQUIRED_VARS HIPRT_INCLUDE_DIR HIP_LINKER_EXECUTABLE
FAIL_MESSAGE "HIP-RT or one of its dependencies not found")
mark_as_advanced(
HIPRT_INCLUDE_DIR

View File

@@ -90,6 +90,7 @@ if(NOT APPLE)
# Can't use CMAKE_SYSTEM_PROCESSOR here as it's not set yet,
# so fall back to checking the env for vcvarsall's VSCMD_ARG_TGT_ARCH
if(NOT (WIN32 AND "$ENV{VSCMD_ARG_TGT_ARCH}" STREQUAL "arm64"))
set(WITH_CYCLES_DEVICE_HIPRT ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_DEVICE_OPTIX ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_CUDA_BINARIES ON CACHE BOOL "" FORCE)
set(WITH_CYCLES_HIP_BINARIES ON CACHE BOOL "" FORCE)
@@ -97,7 +98,3 @@ if(NOT APPLE)
set(WITH_CYCLES_ONEAPI_BINARIES ON CACHE BOOL "" FORCE)
endif()
endif()
if(WIN32 AND NOT (WIN32 AND "$ENV{VSCMD_ARG_TGT_ARCH}" STREQUAL "arm64"))
set(WITH_CYCLES_DEVICE_HIPRT ON CACHE BOOL "" FORCE)
endif()

View File

@@ -629,6 +629,8 @@ if(DEFINED LIBDIR)
without_system_libs_end()
endif()
add_bundled_libraries(hiprt/lib)
# ----------------------------------------------------------------------------
# Build and Link Flags

View File

@@ -20,63 +20,141 @@
#include <hiprt/hiprt_types.h>
#define HIPRT_MAJOR_VERSION 2
#define HIPRT_MINOR_VERSION 0
#define HIPRT_PATCH_VERSION 0xb68861
#define HIPRT_MINOR_VERSION 3
#define HIPRT_PATCH_VERSION 0x7df94af
#define HIPRT_API_VERSION 2000
#define HIPRT_VERSION_STR "02000"
#define HIPRT_API_VERSION 2003
#define HIPRT_VERSION_STR "02003"
#define HIP_VERSION_STR "6.0"
#ifdef _WIN32
#define HIPRTAPI __stdcall
#else
#define HIPRTAPI
#define HIP_CB
#endif
typedef unsigned int hiprtuint32_t;
/* Function types. */
typedef hiprtError(thiprtCreateContext)(hiprtuint32_t hiprtApiVersion,
hiprtContextCreationInput &input,
const hiprtContextCreationInput &input,
hiprtContext *outContext);
typedef hiprtError(thiprtDestroyContext)(hiprtContext context);
typedef hiprtError(thiprtCreateGeometry)(hiprtContext context,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtGeometry *outGeometry);
const hiprtGeometryBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtGeometry &outGeometry);
typedef hiprtError(thiprtDestroyGeometry)(hiprtContext context,
hiprtGeometry outGeometry);
typedef hiprtError(thiprtCreateGeometries)(hiprtContext context,
uint32_t numGeometries,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions buildOptions,
hiprtGeometry **outGeometries);
typedef hiprtError(thiprtDestroyGeometries)(hiprtContext context, uint32_t numGeometries,
hiprtGeometry* outGeometry);
typedef hiprtError(thiprtBuildGeometry)(hiprtContext context,
hiprtBuildOperation buildOperation,
const hiprtGeometryBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtGeometry outGeometry);
typedef hiprtError(thiprtBuildGeometries)(hiprtContext context,
uint32_t numGeometries,
hiprtBuildOperation buildOperation,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtGeometry outGeometry);
hiprtGeometry *outGeometries);
typedef hiprtError(thiprtGetGeometryBuildTemporaryBufferSize)(
hiprtContext context,
const hiprtGeometryBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
size_t &outSize);
typedef hiprtError(thiprtGetGeometriesBuildTemporaryBufferSize)(
hiprtContext context,
uint32_t numGeometries,
const hiprtGeometryBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
size_t *outSize);
size_t &outSize);
typedef hiprtError(thiprtCompactGeometry)( hiprtContext context, hiprtApiStream stream, hiprtGeometry geometryIn, hiprtGeometry& geometryOut);
typedef hiprtError(thiprtCompactGeometries)(
hiprtContext context,
uint32_t numGeometries,
hiprtApiStream stream,
hiprtGeometry* geometriesIn,
hiprtGeometry** geometriesOut );
typedef hiprtError(thiprtCreateScene)(hiprtContext context,
const hiprtSceneBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtScene &outScene);
typedef hiprtError(thiprtCreateScenes)(hiprtContext context,
uint32_t numScenes,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtScene *outScene);
const hiprtBuildOptions buildOptions,
hiprtScene **outScene);
typedef hiprtError(thiprtDestroyScene)(hiprtContext context, hiprtScene outScene);
typedef hiprtError(thiprtDestroyScenes)( hiprtContext context, uint32_t numScenes,hiprtScene *scene );
typedef hiprtError(thiprtBuildScene)(hiprtContext context,
hiprtBuildOperation buildOperation,
const hiprtSceneBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtScene outScene);
typedef hiprtError(thiprtBuildScenes)(hiprtContext context,
uint32_t numScenes,
hiprtBuildOperation buildOperation,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
hiprtDevicePtr temporaryBuffer,
hiprtApiStream stream,
hiprtScene outScene);
hiprtScene *outScene);
typedef hiprtError(thiprtGetSceneBuildTemporaryBufferSize)(
hiprtContext context,
const hiprtSceneBuildInput &buildInput,
const hiprtBuildOptions buildOptions,
size_t &outSize);
typedef hiprtError(thiprtGetScenesBuildTemporaryBufferSize)(
hiprtContext context,
uint32_t numScenes,
const hiprtSceneBuildInput *buildInput,
const hiprtBuildOptions *buildOptions,
size_t *outSize);
const hiprtBuildOptions buildOptions,
size_t &outSize);
typedef hiprtError(thiprtCompactScene)( hiprtContext context, hiprtApiStream stream, hiprtScene sceneIn, hiprtScene& sceneOut );
typedef hiprtError(thiprtCompactScenes)(
hiprtContext context, uint32_t numScenes, hiprtApiStream stream, hiprtScene* scenesIn, hiprtScene** scenesOut );
typedef hiprtError(thiprtCreateFuncTable)(hiprtContext context,
hiprtuint32_t numGeomTypes,
hiprtuint32_t numRayTypes,
hiprtFuncTable *outFuncTable);
hiprtFuncTable &outFuncTable);
typedef hiprtError(thiprtSetFuncTable)(hiprtContext context,
hiprtFuncTable funcTable,
hiprtuint32_t geomType,
hiprtuint32_t rayType,
hiprtFuncDataSet set);
typedef hiprtError (thiprtCreateGlobalStackBuffer)(hiprtContext context, const hiprtGlobalStackBufferInput& input, hiprtGlobalStackBuffer& stackBufferOut );
typedef hiprtError (thiprtDestroyGlobalStackBuffer)( hiprtContext context, hiprtGlobalStackBuffer stackBuffer );
typedef hiprtError(thiprtDestroyFuncTable)(hiprtContext context,
hiprtFuncTable funcTable);
typedef void(thiprtSetLogLevel)( hiprtLogLevel level );
@@ -94,6 +172,8 @@ extern thiprtBuildScene *hiprtBuildScene;
extern thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
extern thiprtCreateFuncTable *hiprtCreateFuncTable;
extern thiprtSetFuncTable *hiprtSetFuncTable;
extern thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer;
extern thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer;
extern thiprtDestroyFuncTable *hiprtDestroyFuncTable;
extern thiprtSetLogLevel *hiprtSetLogLevel;

View File

@@ -234,7 +234,6 @@ static int hipewHipInit(void) {
#ifdef _WIN32
/* Expected in C:/Windows/System32 or similar, no path needed. */
const char *hip_paths[] = {"amdhip64.dll", "amdhip64_6.dll", NULL};
#elif defined(__APPLE__)
/* Default installation path. */
const char *hip_paths[] = {"", NULL};

View File

@@ -40,6 +40,8 @@ thiprtBuildScene *hiprtBuildScene;
thiprtGetSceneBuildTemporaryBufferSize *hiprtGetSceneBuildTemporaryBufferSize;
thiprtCreateFuncTable *hiprtCreateFuncTable;
thiprtSetFuncTable *hiprtSetFuncTable;
thiprtCreateGlobalStackBuffer *hiprtCreateGlobalStackBuffer;
thiprtDestroyGlobalStackBuffer *hiprtDestroyGlobalStackBuffer;
thiprtDestroyFuncTable *hiprtDestroyFuncTable;
thiprtSetLogLevel *hiprtSetLogLevel;
@@ -61,15 +63,17 @@ bool hiprtewInit()
return result;
}
#ifdef _WIN32
initialized = true;
if (atexit(hipewHipRtExit)) {
return false;
}
std::string hiprt_ver(HIPRT_VERSION_STR);
std::string hiprt_path = "hiprt" + hiprt_ver + "64.dll";
#ifdef _WIN32
std::string hiprt_path = "hiprt64.dll";
#else
std::string hiprt_path = "libhiprt64.so";
#endif
hiprt_lib = dynamic_library_open(hiprt_path.c_str());
@@ -89,11 +93,12 @@ bool hiprtewInit()
HIPRT_LIBRARY_FIND(hiprtGetSceneBuildTemporaryBufferSize)
HIPRT_LIBRARY_FIND(hiprtCreateFuncTable)
HIPRT_LIBRARY_FIND(hiprtSetFuncTable)
HIPRT_LIBRARY_FIND(hiprtCreateGlobalStackBuffer)
HIPRT_LIBRARY_FIND(hiprtDestroyFuncTable)
HIPRT_LIBRARY_FIND(hiprtDestroyGlobalStackBuffer)
HIPRT_LIBRARY_FIND(hiprtSetLogLevel)
result = true;
#endif
return result;
}

View File

@@ -1831,10 +1831,9 @@ class CyclesPreferences(bpy.types.AddonPreferences):
if compute_device_type == 'HIP':
import platform
if platform.system() == "Windows": # HIP-RT is currently only supported on Windows
row = layout.row()
row.active = has_rt_api_support['HIP']
row.prop(self, "use_hiprt")
row = layout.row()
row.active = has_rt_api_support['HIP']
row.prop(self, "use_hiprt")
elif compute_device_type == 'ONEAPI' and _cycles.with_embree_gpu:
row = layout.row()

View File

@@ -43,7 +43,7 @@ endif()
###########################################################################
if(WITH_CYCLES_DEVICE_HIP)
if(WITH_CYCLES_HIP_BINARIES)
if(WITH_CYCLES_HIP_BINARIES OR WITH_CYCLES_DEVICE_HIPRT)
# Need at least HIP 5.5 to solve compiler bug affecting the kernel.
find_package(HIP 5.5.0)
set_and_warn_library_found("HIP compiler" HIP_FOUND WITH_CYCLES_HIP_BINARIES)
@@ -55,6 +55,9 @@ if(WITH_CYCLES_DEVICE_HIP)
# HIP RT
if(WITH_CYCLES_DEVICE_HIP AND WITH_CYCLES_DEVICE_HIPRT)
if(DEFINED LIBDIR)
set(HIPRT_ROOT_DIR ${LIBDIR}/hiprt)
endif()
find_package(HIPRT)
set_and_warn_library_found("HIP RT" HIPRT_FOUND WITH_CYCLES_DEVICE_HIPRT)
endif()

View File

@@ -59,7 +59,6 @@ BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */)
HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profiler, bool headless)
: HIPDevice(info, stats, profiler, headless),
global_stack_buffer(this, "global_stack_buffer", MEM_DEVICE_ONLY),
hiprt_context(NULL),
scene(NULL),
functions_table(NULL),
@@ -77,6 +76,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
prim_time_offset(this, "prim_time_offset", MEM_GLOBAL)
{
HIPContextScope scope(this);
global_stack_buffer = {0};
hiprtContextCreationInput hiprt_context_input = {0};
hiprt_context_input.ctxt = hipContext;
hiprt_context_input.device = hipDevice;
@@ -90,7 +90,7 @@ HIPRTDevice::HIPRTDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
}
rt_result = hiprtCreateFuncTable(
hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, &functions_table);
hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table);
if (rt_result != hiprtSuccess) {
set_error(string_printf("Failed to create HIPRT Function Table"));
@@ -113,7 +113,8 @@ HIPRTDevice::~HIPRTDevice()
custom_prim_info.free();
prim_time_offset.free();
prims_time.free();
global_stack_buffer.free();
hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer);
hiprtDestroyFuncTable(hiprt_context, functions_table);
hiprtDestroyScene(hiprt_context, scene);
hiprtDestroyContext(hiprt_context);
@@ -156,12 +157,17 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
const string include_path = source_path;
const string bitcode_file = string_printf(
const string cycles_bc = string_printf(
"cycles_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str());
const string bitcode = path_cache_get(path_join("kernels", bitcode_file));
const string cycles_bitcode = path_cache_get(path_join("kernels", cycles_bc));
const string fatbin_file = string_printf(
"cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str());
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
const string hiprt_bc = string_printf(
"hiprt_%s_%s_%s.bc", name, arch.c_str(), kernel_md5.c_str());
const string hiprt_bitcode = path_cache_get(path_join("kernels", hiprt_bc));
const string hiprt_include_path = path_join(source_path, "kernel/device/hiprt");
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
@@ -210,6 +216,12 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
path_create_directories(fatbin);
string rtc_options;
rtc_options.append(" --offload-arch=").append(arch.c_str());
rtc_options.append(" -D __HIPRT__");
rtc_options.append(" -ffast-math -O3 -std=c++17");
rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
source_path = path_join(path_join(source_path, "kernel"),
path_join("device", path_join(base, string_printf("%s.cpp", name))));
@@ -217,25 +229,44 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
double starttime = time_dt();
const string hiprt_path = getenv("HIPRT_ROOT_DIR");
// First, app kernels are compiled into bitcode, without access to implementation of HIP RT
// functions
if (!path_exists(bitcode)) {
std::string rtc_options;
rtc_options.append(" --offload-arch=").append(arch.c_str());
rtc_options.append(" -D __HIPRT__");
rtc_options.append(" -ffast-math -O3 -std=c++17");
rtc_options.append(" -fgpu-rdc -c --gpu-bundle-output -c -emit-llvm");
if (!path_exists(cycles_bitcode)) {
string command = string_printf("%s %s -I %s -I %s %s -o \"%s\"",
hipcc,
rtc_options.c_str(),
include_path.c_str(),
hiprt_path.c_str(),
hiprt_include_path.c_str(),
source_path.c_str(),
bitcode.c_str());
cycles_bitcode.c_str());
printf("Compiling %sHIP kernel ...\n%s\n",
(use_adaptive_compilation()) ? "adaptive " : "",
command.c_str());
# ifdef _WIN32
command = "call " + command;
# endif
if (system(command.c_str()) != 0) {
set_error(
"Failed to execute compilation command, "
"see console for details.");
return string();
}
}
if (!path_exists(hiprt_bitcode)) {
rtc_options.append(" -x hip");
rtc_options.append(" -D HIPRT_BITCODE_LINKING ");
string source_path = path_join(hiprt_include_path, "/hiprt/impl/hiprt_kernels_bitcode.h");
string command = string_printf("%s %s -I %s %s -o \"%s\"",
hipcc,
rtc_options.c_str(),
hiprt_include_path.c_str(),
source_path.c_str(),
hiprt_bitcode.c_str());
printf("Compiling %sHIP kernel ...\n%s\n",
(use_adaptive_compilation()) ? "adaptive " : "",
@@ -257,13 +288,11 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
string linker_options;
linker_options.append(" --offload-arch=").append(arch.c_str());
linker_options.append(" -fgpu-rdc --hip-link --cuda-device-only ");
string hiprt_ver(HIPRT_VERSION_STR);
string hiprt_bc = hiprt_path + "\\dist\\bin\\Release\\hiprt" + hiprt_ver + "_amd_lib_win.bc";
string linker_command = string_printf("clang++ %s \"%s\" %s -o \"%s\"",
string linker_command = string_printf("clang++ %s \"%s\" \"%s\" -o \"%s\"",
linker_options.c_str(),
bitcode.c_str(),
hiprt_bc.c_str(),
cycles_bitcode.c_str(),
hiprt_bitcode.c_str(),
fatbin.c_str());
# ifdef _WIN32
@@ -458,7 +487,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
geom_input.geomType = Motion_Triangle;
}
else {
@@ -490,7 +519,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *
bvh->vertex_data.host_pointer = 0;
geom_input.type = hiprtPrimitiveTypeTriangleMesh;
geom_input.triangleMesh.primitive = &(bvh->triangle_mesh);
geom_input.primitive.triangleMesh = bvh->triangle_mesh;
}
return geom_input;
@@ -629,7 +658,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh, Hair *hai
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
geom_input.geomType = Curve;
return geom_input;
@@ -732,7 +761,7 @@ hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh, PointClou
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.aabbList.primitive = &bvh->custom_prim_aabb;
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
geom_input.geomType = Point;
return geom_input;
@@ -779,13 +808,13 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op
size_t blas_scratch_buffer_size = 0;
hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
hiprt_context, &geom_input, &options, &blas_scratch_buffer_size);
hiprt_context, geom_input, options, blas_scratch_buffer_size);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to get scratch buffer size for BLAS!"));
}
rt_err = hiprtCreateGeometry(hiprt_context, &geom_input, &options, &bvh->hiprt_geom);
rt_err = hiprtCreateGeometry(hiprt_context, geom_input, options, bvh->hiprt_geom);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to create BLAS!"));
@@ -800,8 +829,8 @@ void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions op
}
rt_err = hiprtBuildGeometry(hiprt_context,
hiprtBuildOperationBuild,
&bvh->geom_input,
&options,
bvh->geom_input,
options,
(void *)(scratch_buffer.device_pointer),
0,
bvh->hiprt_geom);
@@ -951,7 +980,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
user_instance_id[num_instances] = blender_instance_id;
prim_visibility[num_instances] = mask;
hiprt_blas_ptr[num_instances] = (uint64_t)hiprt_geom_current;
hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current;
hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry;
num_instances++;
}
blas_ptr[blender_instance_id] = (uint64_t)hiprt_geom_current;
@@ -981,13 +1011,13 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
}
scene_input_ptr.instanceMasks = (void *)prim_visibility.device_pointer;
scene_input_ptr.instanceGeometries = (void *)hiprt_blas_ptr.device_pointer;
scene_input_ptr.instances = (void *)hiprt_blas_ptr.device_pointer;
scene_input_ptr.instanceTransformHeaders = (void *)transform_headers.device_pointer;
scene_input_ptr.instanceFrames = (void *)instance_transform_matrix.device_pointer;
hiprtScene scene = 0;
hiprtError rt_err = hiprtCreateScene(hiprt_context, &scene_input_ptr, &options, &scene);
hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr, options, scene);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to create TLAS"));
@@ -995,7 +1025,7 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
size_t tlas_scratch_buffer_size;
rt_err = hiprtGetSceneBuildTemporaryBufferSize(
hiprt_context, &scene_input_ptr, &options, &tlas_scratch_buffer_size);
hiprt_context, scene_input_ptr, options, tlas_scratch_buffer_size);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to get scratch buffer size for TLAS"));
@@ -1008,8 +1038,8 @@ hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
rt_err = hiprtBuildScene(hiprt_context,
build_operation,
&scene_input_ptr,
&options,
scene_input_ptr,
options,
(void *)scratch_buffer.device_pointer,
0,
scene);

View File

@@ -53,7 +53,7 @@ class HIPRTDevice : public HIPDevice {
return hiprt_context;
}
device_vector<int> global_stack_buffer;
hiprtGlobalStackBuffer global_stack_buffer;
protected:
enum Filter_Function { Closest = 0, Shadows, Local, Volume, Max_Intersect_Filter_Function };
@@ -111,7 +111,7 @@ class HIPRTDevice : public HIPDevice {
* blas_ptr has all the valid pointers and null pointers and blas for any geometry can be
* directly retrieved from this array (used in subsurface scattering). */
device_vector<int> user_instance_id;
device_vector<uint64_t> hiprt_blas_ptr;
device_vector<hiprtInstance> hiprt_blas_ptr;
device_vector<uint64_t> blas_ptr;
/* custom_prim_info stores custom information for custom primitives for all the primitives in a

View File

@@ -34,14 +34,25 @@ bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
const HIPContextScope scope(hiprt_device_);
const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
if (!hiprt_device_->global_stack_buffer.device_pointer) {
int max_path = num_concurrent_states(0);
hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
hiprt_device_->global_stack_buffer.zero_to_device();
if (!hiprt_device_->global_stack_buffer.stackData) {
uint32_t max_path = num_concurrent_states(0);
hiprtGlobalStackBufferInput stack_buffer_input{
hiprtStackTypeGlobal, hiprtStackEntryTypeInteger, HIPRT_THREAD_STACK_SIZE, max_path};
hiprtError rt_result = hiprtCreateGlobalStackBuffer(hiprt_device_->get_hiprt_context(),
stack_buffer_input,
hiprt_device_->global_stack_buffer);
if (rt_result != hiprtSuccess) {
LOG(ERROR) << "Failed to create hiprt Global Stack Buffer";
return false;
}
}
DeviceKernelArguments args_copy = args;
args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
args_copy.add(DeviceKernelArguments::HIPRT_GLOBAL_STACK,
(void *)(&hiprt_device_->global_stack_buffer),
sizeof(hiprtGlobalStackBuffer));
/* Compute kernel launch parameters. */
const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;

View File

@@ -28,6 +28,7 @@ struct DeviceKernelArguments {
INT32,
FLOAT32,
KERNEL_FILM_CONVERT,
HIPRT_GLOBAL_STACK,
};
static const int MAX_ARGS = 18;

View File

@@ -92,6 +92,28 @@ set(SRC_KERNEL_DEVICE_HIPRT_HEADERS
device/hiprt/hiprt_kernels.h
)
set(SRC_KERNEL_DEVICE_HIPRT_SDK
hiprt/impl/Aabb.h
hiprt/impl/BvhNode.h
hiprt/impl/Geometry.h
hiprt/impl/hiprt_device_impl.h
hiprt/impl/hiprt_kernels_bitcode.h
hiprt/impl/Instance.h
hiprt/impl/Math.h
hiprt/impl/QrDecomposition.h
hiprt/impl/Quaternion.h
hiprt/impl/Scene.h
hiprt/impl/Transform.h
hiprt/impl/Triangle.h
)
set(SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS
hiprt/hiprt_common.h
hiprt/hiprt_device.h
hiprt/hiprt_types.h
hiprt/hiprt_vec.h
)
set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
device/optix/bvh.h
device/optix/compat.h
@@ -422,6 +444,21 @@ add_executable(zstd_compress ../cmake/zstd_compress.cpp)
target_include_directories(zstd_compress SYSTEM PRIVATE ${ZSTD_INCLUDE_DIRS})
target_link_libraries(zstd_compress ${ZSTD_LIBRARIES} ${PTHREADS_LIBRARIES})
if(NOT WITH_BLENDER)
# For the Cycles standalone put libraries next to the Cycles application.
set(cycles_kernel_runtime_lib_target_path ${CYCLES_INSTALL_PATH})
else()
# For Blender put the libraries next to the Blender executable.
#
# Note that the installation path in the delayed_install is relative to the versioned folder,
# which means we need to go one level up.
set(cycles_kernel_runtime_lib_target_path "../")
endif()
if(UNIX AND NOT APPLE)
set(cycles_kernel_runtime_lib_target_path ${cycles_kernel_runtime_lib_target_path}/lib)
endif()
# CUDA module
if(WITH_CYCLES_CUDA_BINARIES)
@@ -689,17 +726,13 @@ endif()
# HIP RT module
if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
set(hiprt_sources device/hiprt/kernel.cpp
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
${SRC_UTIL_HEADERS})
set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
set(hiprt_file_compressed ${hiprt_file}.zst)
set(kernel_sources ${hiprt_sources})
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
if(WITH_CYCLES_DEVICE_HIPRT)
set(HIPRT_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for for HIP-RT kernels")
mark_as_advanced(HIPRT_COMPILER_PARALLEL_JOBS)
set(bvh_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd.hipfb)
set(bvh_file_oro ${CMAKE_CURRENT_BINARY_DIR}/oro_compiled_kernels.hipfb)
if(WIN32)
set(hiprt_compile_command ${CMAKE_COMMAND})
set(hiprt_compile_flags
@@ -713,7 +746,106 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
list(APPEND target_gpus "--offload-arch=${arch}")
endforeach()
set(hiprt_compile_flags
if(WITH_NANOVDB)
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB)
endif()
if(WITH_CYCLES_DEBUG)
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
endif()
set(hiprt_compile_flags_bvh
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
-x hip
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h
${flags}
-D HIPRT_BITCODE_LINKING
-std=c++17
-mllvm
-amdgpu-early-inline-all=false
-mllvm
-amdgpu-function-calls=true
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
--genco
-I ${HIPRT_INCLUDE_DIR}
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${bvh_file})
set(hiprt_compile_flags_bvh_oro
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
-x hip
${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h
${flags}
-D HIPRT_BITCODE_LINKING
-std=c++17
-mllvm
-amdgpu-early-inline-all=false
-mllvm
-amdgpu-function-calls=true
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
--genco
-I ${HIPRT_INCLUDE_DIR}/contrib/Orochi
-include hip/hip_runtime.h
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${bvh_file_oro})
add_custom_command(
OUTPUT ${bvh_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh}
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h)
add_custom_command(
OUTPUT ${bvh_file_oro}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh_oro}
DEPENDS ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h)
delayed_install("" "${bvh_file}" ${cycles_kernel_runtime_lib_target_path})
delayed_install("" "${bvh_file_oro}" ${cycles_kernel_runtime_lib_target_path})
if(WITH_CYCLES_HIP_BINARIES)
set(hiprt_sources device/hiprt/kernel.cpp
${SRC_KERNEL_HEADERS}
${SRC_KERNEL_DEVICE_GPU_HEADERS}
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
${SRC_UTIL_HEADERS})
set(cycles_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
set(sdk_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd_lib.bc)
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
set(hiprt_file_compressed ${hiprt_file}.zst)
set(kernel_sources ${hiprt_sources})
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
set(hiprt_compile_flags_sdk_bc
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
${flags}
-x hip
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h
-D HIPRT_BITCODE_LINKING
-std=c++17
-fgpu-rdc
-c
--gpu-bundle-output
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
-emit-llvm
-I ${HIPRT_INCLUDE_DIR}
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${sdk_bitcode_file})
set(hiprt_compile_flags_cycles_bc
${hiprt_compile_flags}
${target_gpus}
${HIP_HIPCC_FLAGS}
@@ -727,6 +859,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
-fgpu-rdc
-c
--gpu-bundle-output
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
-emit-llvm
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt
@@ -734,45 +867,52 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES)
-Wno-parentheses-equality
-Wno-unused-value
-ffast-math
-o ${bitcode_file})
-o ${cycles_bitcode_file})
if(WITH_NANOVDB)
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB)
endif()
if(WITH_CYCLES_DEBUG)
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
endif()
add_custom_command(
OUTPUT ${bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags}
OUTPUT ${cycles_bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_cycles_bc}
DEPENDS ${kernel_sources})
add_custom_command(
OUTPUT ${sdk_bitcode_file}
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_sdk_bc}
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h)
if(WIN32)
set(hiprt_link_command ${CMAKE_COMMAND})
set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}"
${HIP_LINKER_EXECUTABLE})
else()
# not implemented yet
set(hiprt_link_command ${HIP_LINKER_EXECUTABLE})
set(hiprt_link_flags)
endif()
set(hiprt_link_flags
${hiprt_link_flags}
${target_gpus}
-fgpu-rdc
--hip-link
--cuda-device-only
${bitcode_file}
${HIPRT_BITCODE}
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
${cycles_bitcode_file}
${sdk_bitcode_file}
-o ${hiprt_file})
add_custom_command(
OUTPUT ${hiprt_file}
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
DEPENDS ${bitcode_file})
add_custom_command(
OUTPUT ${hiprt_file}
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
DEPENDS ${cycles_bitcode_file} ${sdk_bitcode_file})
add_custom_command(
OUTPUT ${hiprt_file_compressed}
COMMAND "$<TARGET_FILE:zstd_compress>" ${hiprt_file} ${hiprt_file_compressed}
DEPENDS ${hiprt_file})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
endif()
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed} ${bvh_file} ${bvh_file_oro})
cycles_set_solution_folder(cycles_kernel_hiprt)
endif()
@@ -1151,23 +1291,8 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
DEPENDS ${cycles_oneapi_kernel_sources})
endif()
if(NOT WITH_BLENDER)
# For the Cycles standalone put libraries next to the Cycles application.
set(cycles_oneapi_target_path ${CYCLES_INSTALL_PATH})
else()
# For Blender put the libraries next to the Blender executable.
#
# Note that the installation path in the delayed_install is relative to the versioned folder,
# which means we need to go one level up.
set(cycles_oneapi_target_path "../")
endif()
# install dynamic libraries required at runtime
if(WIN32)
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path})
elseif(UNIX AND NOT APPLE)
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_oneapi_target_path}/lib)
endif()
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_kernel_runtime_lib_target_path})
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
endif()
@@ -1287,6 +1412,10 @@ delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
if(WITH_CYCLES_DEVICE_HIPRT)
delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt)
delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt/impl)
endif()
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)

View File

@@ -125,7 +125,7 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
// we don't need custom intersection functions for SSR
# ifdef HIPRT_SHARED_STACK
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal(local_geom,
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,

View File

@@ -44,36 +44,38 @@ struct LocalPayload {
# if defined(HIPRT_SHARED_STACK)
# define GET_TRAVERSAL_STACK() \
Stack stack(&kg->global_stack_buffer[0], \
HIPRT_THREAD_STACK_SIZE, \
kg->shared_stack, \
HIPRT_SHARED_STACK_SIZE);
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
Instance_Stack instance_stack;
# else
# define GET_TRAVERSAL_STACK()
# endif
# ifdef HIPRT_SHARED_STACK
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalAnyHitCustomStack<Stack> traversal(kernel_data.device_bvh, \
ray_hip, \
stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
hiprtSceneTraversalClosestCustomStack<Stack> traversal(kernel_data.device_bvh, \
ray_hip, \
stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
(hiprtScene)kernel_data.device_bvh, \
ray_hip, \
stack, \
instance_stack, \
visibility, \
hiprtTraversalHintDefault, \
&payload, \
kernel_params.FUNCTION_TABLE, \
RAY_TYPE, \
RAY_TIME);
# else
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
@@ -654,14 +656,14 @@ ccl_device_inline bool volume_intersection_filter(const hiprtRay &ray,
return false;
}
HIPRT_DEVICE bool intersectFunc(u32 geomType,
u32 rayType,
HIPRT_DEVICE bool intersectFunc(uint geomType,
uint rayType,
const hiprtFuncTableHeader &tableHeader,
const hiprtRay &ray,
void *payload,
hiprtHit &hit)
{
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
const uint index = tableHeader.numGeomTypes * rayType + geomType;
const void *data = tableHeader.funcDataSets[index].filterFuncData;
switch (index) {
case Curve_Intersect_Function:
@@ -683,14 +685,14 @@ HIPRT_DEVICE bool intersectFunc(u32 geomType,
return false;
}
HIPRT_DEVICE bool filterFunc(u32 geomType,
u32 rayType,
HIPRT_DEVICE bool filterFunc(uint geomType,
uint rayType,
const hiprtFuncTableHeader &tableHeader,
const hiprtRay &ray,
void *payload,
const hiprtHit &hit)
{
const u32 index = tableHeader.numGeomTypes * rayType + geomType;
const uint index = tableHeader.numGeomTypes * rayType + geomType;
const void *data = tableHeader.funcDataSets[index].intersectFuncData;
switch (index) {
case Triangle_Filter_Closest:

View File

@@ -31,9 +31,9 @@
CCL_NAMESPACE_BEGIN
struct KernelGlobalsGPU {
int *global_stack_buffer;
hiprtGlobalStackBuffer global_stack_buffer;
#ifdef HIPRT_SHARED_STACK
int *shared_stack;
hiprtSharedStackBuffer shared_stack;
#endif
};
@@ -47,7 +47,8 @@ typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
ccl_global KernelGlobalsGPU kg_gpu; \
KernelGlobals kg = &kg_gpu; \
kg->shared_stack = &shared_stack[0]; \
kg->shared_stack.stackData = &shared_stack[0]; \
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
kg->global_stack_buffer = stack_buffer;
#else
# define HIPRT_INIT_KERNEL_GLOBAL() \
@@ -146,6 +147,7 @@ __constant__ KernelParamsHIPRT kernel_params;
# ifdef HIPRT_SHARED_STACK
typedef hiprtGlobalStack Stack;
typedef hiprtEmptyInstanceStack Instance_Stack;
# endif
#endif

View File

@@ -9,7 +9,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@@ -25,7 +25,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_shadow,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@@ -41,7 +41,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@@ -57,7 +57,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@@ -72,7 +72,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_dedicated_light,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
@@ -89,7 +89,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
@@ -104,7 +104,7 @@ ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {

View File

@@ -1899,6 +1899,17 @@ if(WIN32)
endforeach()
endif()
if(WIN32)
if(WITH_CYCLES_DEVICE_HIPRT)
if(EXISTS ${LIBDIR}/hiprt/bin/hiprt64.dll)
install(
FILES ${LIBDIR}/hiprt/bin/hiprt64.dll
DESTINATION "./"
)
endif()
endif()
endif()
# `vcpkg` substitutes our libraries with theirs, which will cause issues when you you run
# these builds on other systems due to missing DLL's. So we opt out the use of `vcpkg`.
if(WIN32)