Files
test2/intern/cycles/device/hiprt/device_impl.cpp
Sergey Sharybin f89728a5e4 Fix: HIP-RT creates copy of vector<Object *> during build
Is harmless from functional perspective, but uses more resources and
potentially slower than it should be. Although, probably something
hard to measure in practice, but still better not follow this anti-
pattern.

Pull Request: https://projects.blender.org/blender/blender/pulls/135529
2025-03-06 11:57:51 +01:00

1115 lines
38 KiB
C++

/* SPDX-FileCopyrightText: 2011-2023 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#ifdef WITH_HIPRT
# include "device/hiprt/device_impl.h"
# include "kernel/device/hiprt/globals.h"
# include "util/log.h"
# include "util/md5.h"
# include "util/path.h"
# include "util/progress.h"
# include "util/string.h"
# include "util/time.h"
# include "util/types.h"
# ifdef _WIN32
# include "util/windows.h"
# endif
# include "bvh/hiprt.h"
# include "scene/hair.h"
# include "scene/mesh.h"
# include "scene/object.h"
# include "scene/pointcloud.h"
CCL_NAMESPACE_BEGIN
static void get_hiprt_transform(float matrix[][4], Transform &tfm)
{
int row = 0;
int col = 0;
matrix[row][col++] = tfm.x.x;
matrix[row][col++] = tfm.x.y;
matrix[row][col++] = tfm.x.z;
matrix[row][col++] = tfm.x.w;
row++;
col = 0;
matrix[row][col++] = tfm.y.x;
matrix[row][col++] = tfm.y.y;
matrix[row][col++] = tfm.y.z;
matrix[row][col++] = tfm.y.w;
row++;
col = 0;
matrix[row][col++] = tfm.z.x;
matrix[row][col++] = tfm.z.y;
matrix[row][col++] = tfm.z.z;
matrix[row][col++] = tfm.z.w;
}
class HIPRTDevice;
BVHLayoutMask HIPRTDevice::get_bvh_layout_mask(const uint /* kernel_features */) const
{
return BVH_LAYOUT_HIPRT;
}
HIPRTDevice::HIPRTDevice(const DeviceInfo &info,
Stats &stats,
Profiler &profiler,
const bool headless)
: HIPDevice(info, stats, profiler, headless),
hiprt_context(nullptr),
scene(nullptr),
functions_table(nullptr),
scratch_buffer_size(0),
scratch_buffer(this, "scratch_buffer", MEM_DEVICE_ONLY),
prim_visibility(this, "prim_visibility", MEM_GLOBAL),
instance_transform_matrix(this, "instance_transform_matrix", MEM_READ_ONLY),
transform_headers(this, "transform_headers", MEM_READ_ONLY),
user_instance_id(this, "user_instance_id", MEM_GLOBAL),
hiprt_blas_ptr(this, "hiprt_blas_ptr", MEM_READ_WRITE),
blas_ptr(this, "blas_ptr", MEM_GLOBAL),
custom_prim_info(this, "custom_prim_info", MEM_GLOBAL),
custom_prim_info_offset(this, "custom_prim_info_offset", MEM_GLOBAL),
prims_time(this, "prims_time", MEM_GLOBAL),
prim_time_offset(this, "prim_time_offset", MEM_GLOBAL)
{
HIPContextScope scope(this);
global_stack_buffer = {0};
hiprtContextCreationInput hiprt_context_input = {nullptr};
hiprt_context_input.ctxt = hipContext;
hiprt_context_input.device = hipDevice;
hiprt_context_input.deviceType = hiprtDeviceAMD;
hiprtError rt_result = hiprtCreateContext(
HIPRT_API_VERSION, hiprt_context_input, &hiprt_context);
if (rt_result != hiprtSuccess) {
set_error(string_printf("Failed to create HIPRT context"));
return;
}
rt_result = hiprtCreateFuncTable(
hiprt_context, Max_Primitive_Type, Max_Intersect_Filter_Function, functions_table);
if (rt_result != hiprtSuccess) {
set_error(string_printf("Failed to create HIPRT Function Table"));
return;
}
hiprtSetLogLevel(hiprtLogLevelNone);
}
HIPRTDevice::~HIPRTDevice()
{
HIPContextScope scope(this);
user_instance_id.free();
prim_visibility.free();
hiprt_blas_ptr.free();
blas_ptr.free();
instance_transform_matrix.free();
transform_headers.free();
custom_prim_info_offset.free();
custom_prim_info.free();
prim_time_offset.free();
prims_time.free();
hiprtDestroyGlobalStackBuffer(hiprt_context, global_stack_buffer);
hiprtDestroyFuncTable(hiprt_context, functions_table);
hiprtDestroyScene(hiprt_context, scene);
hiprtDestroyContext(hiprt_context);
}
unique_ptr<DeviceQueue> HIPRTDevice::gpu_queue_create()
{
return make_unique<HIPRTDeviceQueue>(this);
}
string HIPRTDevice::compile_kernel_get_common_cflags(const uint kernel_features)
{
string cflags = HIPDevice::compile_kernel_get_common_cflags(kernel_features);
cflags += " -D __HIPRT__ ";
return cflags;
}
string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const char *base)
{
int major, minor;
hipDeviceGetAttribute(&major, hipDeviceAttributeComputeCapabilityMajor, hipDevId);
hipDeviceGetAttribute(&minor, hipDeviceAttributeComputeCapabilityMinor, hipDevId);
const std::string arch = hipDeviceArch(hipDevId);
if (!use_adaptive_compilation()) {
const string fatbin = path_get(string_printf("lib/%s_rt_%s.hipfb.zst", name, arch.c_str()));
VLOG(1) << "Testing for pre-compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using precompiled kernel.";
return fatbin;
}
}
string source_path = path_get("source");
const string source_md5 = path_files_md5_hash(source_path);
string common_cflags = compile_kernel_get_common_cflags(kernel_features);
const string kernel_md5 = util_md5_string(source_md5 + common_cflags);
const string include_path = source_path;
const string fatbin_file = string_printf(
"cycles_%s_%s_%s.hipfb", name, arch.c_str(), kernel_md5.c_str());
const string fatbin = path_cache_get(path_join("kernels", fatbin_file));
const string hiprt_include_path = path_join(source_path, "kernel/device/hiprt");
VLOG(1) << "Testing for locally compiled kernel " << fatbin << ".";
if (path_exists(fatbin)) {
VLOG(1) << "Using locally compiled kernel.";
return fatbin;
}
# ifdef _WIN32
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
if (!hipSupportsDevice(hipDevId)) {
set_error(
string_printf("HIP backend requires compute capability 10.1 or up, but found %d.%d. "
"Your GPU is not supported.",
major,
minor));
}
else {
set_error(
string_printf("HIP binary kernel for this graphics card compute "
"capability (%d.%d) not found.",
major,
minor));
}
return string();
}
# endif
const char *const hipcc = hipewCompilerPath();
if (hipcc == nullptr) {
set_error(
"HIP hipcc compiler not found. "
"Install HIP toolkit in default location.");
return string();
}
const int hipcc_hip_version = hipewCompilerVersion();
VLOG_INFO << "Found hipcc " << hipcc << ", HIP version " << hipcc_hip_version << ".";
if (hipcc_hip_version < 40) {
printf(
"Unsupported HIP version %d.%d detected, "
"you need HIP 4.0 or newer.\n",
hipcc_hip_version / 10,
hipcc_hip_version % 10);
return string();
}
path_create_directories(fatbin);
source_path = path_join(path_join(source_path, "kernel"),
path_join("device", path_join(base, string_printf("%s.cpp", name))));
const char *const kernel_ext = "genco";
string options;
options.append(
"-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17 -D __HIPRT__");
options.append(" --offload-arch=").append(arch.c_str());
# ifdef WITH_NANOVDB
options.append(" -D WITH_NANOVDB");
# endif
printf("Compiling %s and caching to %s", source_path.c_str(), fatbin.c_str());
double starttime = time_dt();
string compile_command = string_printf("%s %s -I %s -I %s --%s %s -o \"%s\"",
hipcc,
options.c_str(),
include_path.c_str(),
hiprt_include_path.c_str(),
kernel_ext,
source_path.c_str(),
fatbin.c_str());
# ifdef _WIN32
compile_command = "call " + compile_command;
# endif
if (system(compile_command.c_str()) != 0) {
set_error(
"Failed to execute linking command, "
"see console for details.");
return string();
}
printf("Kernel compilation finished in %.2lfs.\n", time_dt() - starttime);
return fatbin;
}
bool HIPRTDevice::load_kernels(const uint kernel_features)
{
if (hipModule) {
if (use_adaptive_compilation()) {
VLOG(1) << "Skipping HIP kernel reload for adaptive compilation, not currently supported.";
}
return true;
}
if (hipContext == nullptr) {
return false;
}
if (!support_device(kernel_features)) {
return false;
}
/* Keep track of whether motion blur is enabled, so to enable/disable motion in BVH builds
* This is necessary since objects may be reported to have motion if the Vector pass is
* active, but may still need to be rendered without motion blur if that isn't active as well.
*/
use_motion_blur |= kernel_features & KERNEL_FEATURE_OBJECT_MOTION;
/* get kernel */
const char *kernel_name = "kernel";
string fatbin = compile_kernel(kernel_features, kernel_name);
if (fatbin.empty()) {
return false;
}
/* open module */
HIPContextScope scope(this);
string fatbin_data;
hipError_t result;
if (path_read_compressed_text(fatbin, fatbin_data)) {
result = hipModuleLoadData(&hipModule, fatbin_data.c_str());
}
else {
result = hipErrorFileNotFound;
}
if (result != hipSuccess) {
set_error(string_printf(
"Failed to load HIP kernel from '%s' (%s)", fatbin.c_str(), hipewErrorString(result)));
}
if (result == hipSuccess) {
kernels.load(this);
{
const DeviceKernel test_kernel = (kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE :
(kernel_features & KERNEL_FEATURE_MNEE) ?
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE :
DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE;
HIPRTDeviceQueue queue(this);
device_ptr d_path_index = 0;
device_ptr d_render_buffer = 0;
int d_work_size = 0;
DeviceKernelArguments args(&d_path_index, &d_render_buffer, &d_work_size);
queue.init_execution();
queue.enqueue(test_kernel, 1, args);
queue.synchronize();
}
}
return (result == hipSuccess);
}
void HIPRTDevice::const_copy_to(const char *name, void *host, const size_t size)
{
HIPContextScope scope(this);
hipDeviceptr_t mem;
size_t bytes;
if (strcmp(name, "data") == 0) {
assert(size <= sizeof(KernelData));
KernelData *const data = (KernelData *)host;
*(hiprtScene *)&data->device_bvh = scene;
}
hip_assert(hipModuleGetGlobal(&mem, &bytes, hipModule, "kernel_params"));
assert(bytes == sizeof(KernelParamsHIPRT));
# define KERNEL_DATA_ARRAY(data_type, data_name) \
if (strcmp(name, #data_name) == 0) { \
hip_assert(hipMemcpyHtoD(mem + offsetof(KernelParamsHIPRT, data_name), host, size)); \
return; \
}
KERNEL_DATA_ARRAY(KernelData, data)
KERNEL_DATA_ARRAY(IntegratorStateGPU, integrator_state)
KERNEL_DATA_ARRAY(int, user_instance_id)
KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
KERNEL_DATA_ARRAY(int2, custom_prim_info)
KERNEL_DATA_ARRAY(int, prim_time_offset)
KERNEL_DATA_ARRAY(float2, prims_time)
# include "kernel/data_arrays.h"
# undef KERNEL_DATA_ARRAY
}
hiprtGeometryBuildInput HIPRTDevice::prepare_triangle_blas(BVHHIPRT *bvh, Mesh *mesh)
{
hiprtGeometryBuildInput geom_input;
geom_input.geomType = Triangle;
if (use_motion_blur && mesh->has_motion_blur()) {
const Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
const float3 *vert_steps = attr_mP->data_float3();
const size_t num_verts = mesh->get_verts().size();
const size_t num_steps = mesh->get_motion_steps();
const size_t num_triangles = mesh->num_triangles();
const float3 *verts = mesh->get_verts().data();
int num_bounds = 0;
if (bvh->params.num_motion_triangle_steps == 0 || bvh->params.use_spatial_split) {
bvh->custom_primitive_bound.alloc(num_triangles);
bvh->custom_prim_info.resize(num_triangles);
for (uint j = 0; j < num_triangles; j++) {
Mesh::Triangle t = mesh->get_triangle(j);
BoundBox bounds = BoundBox::empty;
t.bounds_grow(verts, bounds);
for (size_t step = 0; step < num_steps - 1; step++) {
t.bounds_grow(vert_steps + step * num_verts, bounds);
}
if (bounds.valid()) {
bvh->custom_primitive_bound[num_bounds] = bounds;
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = mesh->primitive_type();
num_bounds++;
}
}
}
else {
const int num_bvh_steps = bvh->params.num_motion_triangle_steps * 2 + 1;
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
bvh->custom_primitive_bound.alloc(num_triangles * num_bvh_steps);
bvh->custom_prim_info.resize(num_triangles * num_bvh_steps);
bvh->prims_time.resize(num_triangles * num_bvh_steps);
for (uint j = 0; j < num_triangles; j++) {
Mesh::Triangle t = mesh->get_triangle(j);
float3 prev_verts[3];
t.motion_verts(verts, vert_steps, num_verts, num_steps, 0.0f, prev_verts);
BoundBox prev_bounds = BoundBox::empty;
prev_bounds.grow(prev_verts[0]);
prev_bounds.grow(prev_verts[1]);
prev_bounds.grow(prev_verts[2]);
for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
float3 curr_verts[3];
t.motion_verts(verts, vert_steps, num_verts, num_steps, curr_time, curr_verts);
BoundBox curr_bounds = BoundBox::empty;
curr_bounds.grow(curr_verts[0]);
curr_bounds.grow(curr_verts[1]);
curr_bounds.grow(curr_verts[2]);
BoundBox bounds = prev_bounds;
bounds.grow(curr_bounds);
if (bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
bvh->custom_primitive_bound[num_bounds] = bounds;
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = mesh->primitive_type();
bvh->prims_time[num_bounds].x = curr_time;
bvh->prims_time[num_bounds].y = prev_time;
num_bounds++;
}
prev_bounds = curr_bounds;
}
}
}
bvh->custom_prim_aabb.aabbCount = num_bounds;
bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
bvh->custom_primitive_bound.copy_to_device();
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
geom_input.geomType = Motion_Triangle;
}
else {
size_t triangle_size = mesh->get_triangles().size();
void *triangle_data = mesh->get_triangles().data();
size_t vertex_size = mesh->get_verts().size();
void *vertex_data = mesh->get_verts().data();
bvh->triangle_mesh.triangleCount = mesh->num_triangles();
bvh->triangle_mesh.triangleStride = 3 * sizeof(int);
bvh->triangle_mesh.vertexCount = vertex_size;
bvh->triangle_mesh.vertexStride = sizeof(float3);
bvh->triangle_index.host_pointer = triangle_data;
bvh->triangle_index.data_elements = 1;
bvh->triangle_index.data_type = TYPE_INT;
bvh->triangle_index.data_size = triangle_size;
bvh->triangle_index.copy_to_device();
bvh->triangle_mesh.triangleIndices = (void *)(bvh->triangle_index.device_pointer);
// either has to set the host pointer to zero, or increment the refcount on triangle_data
bvh->triangle_index.host_pointer = nullptr;
bvh->vertex_data.host_pointer = vertex_data;
bvh->vertex_data.data_elements = 4;
bvh->vertex_data.data_type = TYPE_FLOAT;
bvh->vertex_data.data_size = vertex_size;
bvh->vertex_data.copy_to_device();
bvh->triangle_mesh.vertices = (void *)(bvh->vertex_data.device_pointer);
bvh->vertex_data.host_pointer = nullptr;
geom_input.type = hiprtPrimitiveTypeTriangleMesh;
geom_input.primitive.triangleMesh = bvh->triangle_mesh;
}
return geom_input;
}
hiprtGeometryBuildInput HIPRTDevice::prepare_curve_blas(BVHHIPRT *bvh, Hair *hair)
{
hiprtGeometryBuildInput geom_input;
const PrimitiveType primitive_type = hair->primitive_type();
const size_t num_curves = hair->num_curves();
const size_t num_segments = hair->num_segments();
const Attribute *curve_attr_mP = nullptr;
if (use_motion_blur && hair->has_motion_blur()) {
curve_attr_mP = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
}
if (curve_attr_mP == nullptr || bvh->params.num_motion_curve_steps == 0) {
bvh->custom_prim_info.resize(num_segments);
bvh->custom_primitive_bound.alloc(num_segments);
}
else {
size_t num_boxes = bvh->params.num_motion_curve_steps * 2 * num_segments;
bvh->custom_prim_info.resize(num_boxes);
bvh->prims_time.resize(num_boxes);
bvh->custom_primitive_bound.alloc(num_boxes);
}
int num_bounds = 0;
float3 *curve_keys = hair->get_curve_keys().data();
for (uint j = 0; j < num_curves; j++) {
const Hair::Curve curve = hair->get_curve(j);
const float *curve_radius = hair->get_curve_radius().data();
int first_key = curve.first_key;
for (int k = 0; k < curve.num_keys - 1; k++) {
if (curve_attr_mP == nullptr) {
float3 current_keys[4];
current_keys[0] = curve_keys[max(first_key + k - 1, first_key)];
current_keys[1] = curve_keys[first_key + k];
current_keys[2] = curve_keys[first_key + k + 1];
current_keys[3] = curve_keys[min(first_key + k + 2, first_key + curve.num_keys - 1)];
if (current_keys[0].x == current_keys[1].x && current_keys[1].x == current_keys[2].x &&
current_keys[2].x == current_keys[3].x && current_keys[0].y == current_keys[1].y &&
current_keys[1].y == current_keys[2].y && current_keys[2].y == current_keys[3].y &&
current_keys[0].z == current_keys[1].z && current_keys[1].z == current_keys[2].z &&
current_keys[2].z == current_keys[3].z)
{
continue;
}
BoundBox bounds = BoundBox::empty;
curve.bounds_grow(k, hair->get_curve_keys().data(), curve_radius, bounds);
if (bounds.valid()) {
int type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = type;
bvh->custom_primitive_bound[num_bounds] = bounds;
num_bounds++;
}
}
else {
const size_t num_steps = hair->get_motion_steps();
const float4 *key_steps = curve_attr_mP->data_float4();
const size_t num_keys = hair->get_curve_keys().size();
if (bvh->params.num_motion_curve_steps == 0 || bvh->params.use_spatial_split) {
BoundBox bounds = BoundBox::empty;
curve.bounds_grow(k, hair->get_curve_keys().data(), curve_radius, bounds);
for (size_t step = 0; step < num_steps - 1; step++) {
curve.bounds_grow(k, key_steps + step * num_keys, bounds);
}
if (bounds.valid()) {
int type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = type;
bvh->custom_primitive_bound[num_bounds] = bounds;
num_bounds++;
}
}
else {
const int num_bvh_steps = bvh->params.num_motion_curve_steps * 2 + 1;
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
float4 prev_keys[4];
curve.cardinal_motion_keys(curve_keys,
curve_radius,
key_steps,
num_keys,
num_steps,
0.0f,
k - 1,
k,
k + 1,
k + 2,
prev_keys);
BoundBox prev_bounds = BoundBox::empty;
curve.bounds_grow(prev_keys, prev_bounds);
for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
float4 curr_keys[4];
curve.cardinal_motion_keys(curve_keys,
curve_radius,
key_steps,
num_keys,
num_steps,
curr_time,
k - 1,
k,
k + 1,
k + 2,
curr_keys);
BoundBox curr_bounds = BoundBox::empty;
curve.bounds_grow(curr_keys, curr_bounds);
BoundBox bounds = prev_bounds;
bounds.grow(curr_bounds);
if (bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
int packed_type = PRIMITIVE_PACK_SEGMENT(primitive_type, k);
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = packed_type; // k
bvh->custom_primitive_bound[num_bounds] = bounds;
bvh->prims_time[num_bounds].x = prev_time;
bvh->prims_time[num_bounds].y = curr_time;
num_bounds++;
}
prev_bounds = curr_bounds;
}
}
}
}
}
bvh->custom_prim_aabb.aabbCount = num_bounds;
bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
bvh->custom_primitive_bound.copy_to_device();
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
geom_input.geomType = Curve;
return geom_input;
}
hiprtGeometryBuildInput HIPRTDevice::prepare_point_blas(BVHHIPRT *bvh, PointCloud *pointcloud)
{
hiprtGeometryBuildInput geom_input;
const Attribute *point_attr_mP = nullptr;
if (use_motion_blur && pointcloud->has_motion_blur()) {
point_attr_mP = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
}
const float3 *points_data = pointcloud->get_points().data();
const float *radius_data = pointcloud->get_radius().data();
const size_t num_points = pointcloud->num_points();
const float4 *motion_data = (point_attr_mP) ? point_attr_mP->data_float4() : nullptr;
const size_t num_steps = pointcloud->get_motion_steps();
int num_bounds = 0;
if (point_attr_mP == nullptr) {
bvh->custom_prim_info.resize(num_points);
bvh->custom_primitive_bound.alloc(num_points);
for (uint j = 0; j < num_points; j++) {
const PointCloud::Point point = pointcloud->get_point(j);
BoundBox bounds = BoundBox::empty;
point.bounds_grow(points_data, radius_data, bounds);
if (bounds.valid()) {
bvh->custom_primitive_bound[num_bounds] = bounds;
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = PRIMITIVE_POINT;
num_bounds++;
}
}
}
else if (bvh->params.num_motion_point_steps == 0 || bvh->params.use_spatial_split) {
bvh->custom_prim_info.resize(num_points);
bvh->custom_primitive_bound.alloc(num_points);
for (uint j = 0; j < num_points; j++) {
const PointCloud::Point point = pointcloud->get_point(j);
BoundBox bounds = BoundBox::empty;
point.bounds_grow(points_data, radius_data, bounds);
for (size_t step = 0; step < num_steps - 1; step++) {
point.bounds_grow(motion_data[step * num_points + j], bounds);
}
if (bounds.valid()) {
bvh->custom_primitive_bound[num_bounds] = bounds;
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = PRIMITIVE_MOTION_POINT;
num_bounds++;
}
}
}
else {
const int num_bvh_steps = bvh->params.num_motion_point_steps * 2 + 1;
const float num_bvh_steps_inv_1 = 1.0f / (num_bvh_steps - 1);
bvh->custom_prim_info.resize(num_points * num_bvh_steps);
bvh->custom_primitive_bound.alloc(num_points * num_bvh_steps);
bvh->prims_time.resize(num_points * num_bvh_steps);
for (uint j = 0; j < num_points; j++) {
const PointCloud::Point point = pointcloud->get_point(j);
const size_t num_steps = pointcloud->get_motion_steps();
const float4 *point_steps = point_attr_mP->data_float4();
float4 prev_key = point.motion_key(
points_data, radius_data, point_steps, num_points, num_steps, 0.0f, j);
BoundBox prev_bounds = BoundBox::empty;
point.bounds_grow(prev_key, prev_bounds);
for (int bvh_step = 1; bvh_step < num_bvh_steps; ++bvh_step) {
const float curr_time = (float)(bvh_step)*num_bvh_steps_inv_1;
float4 curr_key = point.motion_key(
points_data, radius_data, point_steps, num_points, num_steps, curr_time, j);
BoundBox curr_bounds = BoundBox::empty;
point.bounds_grow(curr_key, curr_bounds);
BoundBox bounds = prev_bounds;
bounds.grow(curr_bounds);
if (bounds.valid()) {
const float prev_time = (float)(bvh_step - 1) * num_bvh_steps_inv_1;
bvh->custom_primitive_bound[num_bounds] = bounds;
bvh->custom_prim_info[num_bounds].x = j;
bvh->custom_prim_info[num_bounds].y = PRIMITIVE_MOTION_POINT;
bvh->prims_time[num_bounds].x = prev_time;
bvh->prims_time[num_bounds].y = curr_time;
num_bounds++;
}
prev_bounds = curr_bounds;
}
}
}
bvh->custom_prim_aabb.aabbCount = num_bounds;
bvh->custom_prim_aabb.aabbStride = sizeof(BoundBox);
bvh->custom_primitive_bound.copy_to_device();
bvh->custom_prim_aabb.aabbs = (void *)bvh->custom_primitive_bound.device_pointer;
geom_input.type = hiprtPrimitiveTypeAABBList;
geom_input.primitive.aabbList = bvh->custom_prim_aabb;
geom_input.geomType = Point;
return geom_input;
}
void HIPRTDevice::build_blas(BVHHIPRT *bvh, Geometry *geom, hiprtBuildOptions options)
{
hiprtGeometryBuildInput geom_input = {};
switch (geom->geometry_type) {
case Geometry::MESH:
case Geometry::VOLUME: {
Mesh *mesh = static_cast<Mesh *>(geom);
if (mesh->num_triangles() == 0) {
return;
}
geom_input = prepare_triangle_blas(bvh, mesh);
break;
}
case Geometry::HAIR: {
Hair *const hair = static_cast<Hair *const>(geom);
if (hair->num_segments() == 0) {
return;
}
geom_input = prepare_curve_blas(bvh, hair);
break;
}
case Geometry::POINTCLOUD: {
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
if (pointcloud->num_points() == 0) {
return;
}
geom_input = prepare_point_blas(bvh, pointcloud);
break;
}
default:
assert(geom_input.geomType != hiprtInvalidValue);
}
size_t blas_scratch_buffer_size = 0;
hiprtError rt_err = hiprtGetGeometryBuildTemporaryBufferSize(
hiprt_context, geom_input, options, blas_scratch_buffer_size);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to get scratch buffer size for BLAS!"));
}
rt_err = hiprtCreateGeometry(hiprt_context, geom_input, options, bvh->hiprt_geom);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to create BLAS!"));
}
{
thread_scoped_lock lock(hiprt_mutex);
if (blas_scratch_buffer_size > scratch_buffer_size) {
scratch_buffer.alloc(blas_scratch_buffer_size);
scratch_buffer.zero_to_device();
if (!scratch_buffer.device_pointer) {
hiprtDestroyGeometry(hiprt_context, bvh->hiprt_geom);
bvh->hiprt_geom = nullptr;
return;
}
scratch_buffer_size = blas_scratch_buffer_size;
}
bvh->geom_input = geom_input;
rt_err = hiprtBuildGeometry(hiprt_context,
hiprtBuildOperationBuild,
bvh->geom_input,
options,
(void *)(scratch_buffer.device_pointer),
nullptr,
bvh->hiprt_geom);
}
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to build BLAS"));
}
}
hiprtScene HIPRTDevice::build_tlas(BVHHIPRT *bvh,
const vector<Object *> &objects,
hiprtBuildOptions options,
bool refit)
{
size_t num_object = objects.size();
if (num_object == 0) {
return nullptr;
}
hiprtBuildOperation build_operation = refit ? hiprtBuildOperationUpdate :
hiprtBuildOperationBuild;
array<hiprtFrameMatrix> transform_matrix;
unordered_map<Geometry *, int2> prim_info_map;
size_t custom_prim_offset = 0;
unordered_map<Geometry *, int> prim_time_map;
size_t num_instances = 0;
int blender_instance_id = 0;
user_instance_id.alloc(num_object);
prim_visibility.alloc(num_object);
hiprt_blas_ptr.alloc(num_object);
blas_ptr.alloc(num_object);
transform_headers.alloc(num_object);
custom_prim_info_offset.alloc(num_object);
prim_time_offset.alloc(num_object);
for (Object *ob : objects) {
uint32_t mask = 0;
if (ob->is_traceable()) {
mask = ob->visibility_for_tracing();
}
Transform current_transform = ob->get_tfm();
Geometry *geom = ob->get_geometry();
bool transform_applied = geom->transform_applied;
BVHHIPRT *current_bvh = static_cast<BVHHIPRT *>(geom->bvh.get());
bool is_valid_geometry = current_bvh->geom_input.geomType != hiprtInvalidValue;
hiprtGeometry hiprt_geom_current = current_bvh->hiprt_geom;
hiprtFrameMatrix hiprt_transform_matrix = {{{0}}};
Transform identity_matrix = transform_identity();
get_hiprt_transform(hiprt_transform_matrix.matrix, identity_matrix);
if (is_valid_geometry) {
bool is_custom_prim = current_bvh->custom_prim_info.size() > 0;
if (is_custom_prim) {
bool has_motion_blur = current_bvh->prims_time.size() > 0;
unordered_map<Geometry *, int2>::iterator it = prim_info_map.find(geom);
if (prim_info_map.find(geom) != prim_info_map.end()) {
custom_prim_info_offset[blender_instance_id] = it->second;
if (has_motion_blur) {
prim_time_offset[blender_instance_id] = prim_time_map[geom];
}
}
else {
int offset = bvh->custom_prim_info.size();
prim_info_map[geom].x = offset;
prim_info_map[geom].y = custom_prim_offset;
bvh->custom_prim_info.resize(offset + current_bvh->custom_prim_info.size());
memcpy(bvh->custom_prim_info.data() + offset,
current_bvh->custom_prim_info.data(),
current_bvh->custom_prim_info.size() * sizeof(int2));
custom_prim_info_offset[blender_instance_id].x = offset;
custom_prim_info_offset[blender_instance_id].y = custom_prim_offset;
if (geom->is_hair()) {
custom_prim_offset += ((Hair *)geom)->num_curves();
}
else if (geom->is_pointcloud()) {
custom_prim_offset += ((PointCloud *)geom)->num_points();
}
else {
custom_prim_offset += ((Mesh *)geom)->num_triangles();
}
if (has_motion_blur) {
int time_offset = bvh->prims_time.size();
prim_time_map[geom] = time_offset;
bvh->prims_time.resize(time_offset + current_bvh->prims_time.size());
memcpy(bvh->prims_time.data() + time_offset,
current_bvh->prims_time.data(),
current_bvh->prims_time.size() * sizeof(float2));
prim_time_offset[blender_instance_id] = time_offset;
}
else {
prim_time_offset[blender_instance_id] = -1;
}
}
}
else {
custom_prim_info_offset[blender_instance_id] = {-1, -1};
}
hiprtTransformHeader current_header = {0};
current_header.frameCount = 1;
current_header.frameIndex = transform_matrix.size();
if (use_motion_blur && ob->get_motion().size()) {
int motion_size = ob->get_motion().size();
assert(motion_size != 1);
array<Transform> tfm_array = ob->get_motion();
float time_iternval = 1 / (float)(motion_size - 1);
current_header.frameCount = motion_size;
vector<hiprtFrameMatrix> tfm_hiprt_mb;
tfm_hiprt_mb.resize(motion_size);
for (int i = 0; i < motion_size; i++) {
get_hiprt_transform(tfm_hiprt_mb[i].matrix, tfm_array[i]);
tfm_hiprt_mb[i].time = (float)i * time_iternval;
transform_matrix.push_back_slow(tfm_hiprt_mb[i]);
}
}
else {
if (transform_applied) {
current_transform = identity_matrix;
}
get_hiprt_transform(hiprt_transform_matrix.matrix, current_transform);
transform_matrix.push_back_slow(hiprt_transform_matrix);
}
transform_headers[num_instances] = current_header;
user_instance_id[num_instances] = blender_instance_id;
prim_visibility[num_instances] = mask;
hiprt_blas_ptr[num_instances].geometry = hiprt_geom_current;
hiprt_blas_ptr[num_instances].type = hiprtInstanceTypeGeometry;
num_instances++;
}
blas_ptr[blender_instance_id] = (uint64_t)hiprt_geom_current;
blender_instance_id++;
}
size_t table_ptr_size = 0;
hipDeviceptr_t table_device_ptr;
hip_assert(hipModuleGetGlobal(&table_device_ptr, &table_ptr_size, hipModule, "kernel_params"));
size_t kernel_param_offset[4];
int table_index = 0;
kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_closest_intersect);
kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_shadow_intersect);
kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_local_intersect);
kernel_param_offset[table_index++] = offsetof(KernelParamsHIPRT, table_volume_intersect);
for (int index = 0; index < table_index; index++) {
hip_assert(hipMemcpyHtoD(table_device_ptr + kernel_param_offset[index],
(void *)&functions_table,
sizeof(device_ptr)));
}
if (num_instances == 0)
return nullptr;
int frame_count = transform_matrix.size();
hiprtSceneBuildInput scene_input_ptr = {nullptr};
scene_input_ptr.instanceCount = num_instances;
scene_input_ptr.frameCount = frame_count;
scene_input_ptr.frameType = hiprtFrameTypeMatrix;
user_instance_id.copy_to_device();
prim_visibility.copy_to_device();
hiprt_blas_ptr.copy_to_device();
blas_ptr.copy_to_device();
transform_headers.copy_to_device();
{
if (instance_transform_matrix.data_size != frame_count) {
assert(!instance_transform_matrix.host_pointer);
instance_transform_matrix.host_and_device_free();
}
instance_transform_matrix.host_pointer = transform_matrix.data();
instance_transform_matrix.data_elements = sizeof(hiprtFrameMatrix);
instance_transform_matrix.data_type = TYPE_UCHAR;
instance_transform_matrix.data_size = frame_count;
instance_transform_matrix.data_width = frame_count;
instance_transform_matrix.data_height = 0;
instance_transform_matrix.data_depth = 0;
instance_transform_matrix.copy_to_device();
instance_transform_matrix.host_pointer = nullptr;
}
scene_input_ptr.instanceMasks = (void *)prim_visibility.device_pointer;
scene_input_ptr.instances = (void *)hiprt_blas_ptr.device_pointer;
scene_input_ptr.instanceTransformHeaders = (void *)transform_headers.device_pointer;
scene_input_ptr.instanceFrames = (void *)instance_transform_matrix.device_pointer;
hiprtScene scene = nullptr;
hiprtError rt_err = hiprtCreateScene(hiprt_context, scene_input_ptr, options, scene);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to create TLAS"));
}
size_t tlas_scratch_buffer_size;
rt_err = hiprtGetSceneBuildTemporaryBufferSize(
hiprt_context, scene_input_ptr, options, tlas_scratch_buffer_size);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to get scratch buffer size for TLAS"));
}
if (tlas_scratch_buffer_size > scratch_buffer_size) {
scratch_buffer.alloc(tlas_scratch_buffer_size);
scratch_buffer.zero_to_device();
}
rt_err = hiprtBuildScene(hiprt_context,
build_operation,
scene_input_ptr,
options,
(void *)scratch_buffer.device_pointer,
nullptr,
scene);
if (rt_err != hiprtSuccess) {
set_error(string_printf("Failed to build TLAS"));
}
scratch_buffer.free();
scratch_buffer_size = 0;
if (bvh->custom_prim_info.size()) {
size_t data_size = bvh->custom_prim_info.size();
if (custom_prim_info.data_size != data_size) {
assert(!custom_prim_info.host_pointer);
custom_prim_info.host_and_device_free();
}
custom_prim_info.host_pointer = bvh->custom_prim_info.data();
custom_prim_info.data_elements = 2;
custom_prim_info.data_type = TYPE_INT;
custom_prim_info.data_size = data_size;
custom_prim_info.data_width = data_size;
custom_prim_info.data_height = 0;
custom_prim_info.data_depth = 0;
custom_prim_info.copy_to_device();
custom_prim_info.host_pointer = nullptr;
custom_prim_info_offset.copy_to_device();
}
if (bvh->prims_time.size()) {
size_t data_size = bvh->prims_time.size();
if (prims_time.data_size != data_size) {
assert(!prims_time.host_pointer);
prims_time.host_and_device_free();
}
prims_time.host_pointer = bvh->prims_time.data();
prims_time.data_elements = 2;
prims_time.data_type = TYPE_FLOAT;
prims_time.data_size = data_size;
prims_time.data_width = data_size;
prims_time.data_height = 0;
prims_time.data_depth = 0;
prims_time.copy_to_device();
prims_time.host_pointer = nullptr;
prim_time_offset.copy_to_device();
}
return scene;
}
void HIPRTDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
{
progress.set_substatus("Building HIPRT acceleration structure");
hiprtBuildOptions options;
options.buildFlags = hiprtBuildFlagBitPreferHighQualityBuild;
BVHHIPRT *bvh_rt = static_cast<BVHHIPRT *>(bvh);
HIPContextScope scope(this);
if (!bvh_rt->is_tlas()) {
const vector<Geometry *> &geometry = bvh_rt->geometry;
assert(geometry.size() == 1);
build_blas(bvh_rt, geometry[0], options);
}
else {
if (scene) {
hiprtDestroyScene(hiprt_context, scene);
}
scene = build_tlas(bvh_rt, bvh_rt->objects, options, refit);
}
}
CCL_NAMESPACE_END
#endif