Files
test2/intern/cycles/device/metal/bvh.mm
2025-04-03 11:44:25 -04:00

1399 lines
54 KiB
Plaintext

/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#ifdef WITH_METAL
# include <algorithm>
# include <chrono>
# include <thread>
# include <vector>
# include "scene/hair.h"
# include "scene/mesh.h"
# include "scene/object.h"
# include "scene/pointcloud.h"
# include "util/progress.h"
# include "device/metal/bvh.h"
# include "device/metal/util.h"
CCL_NAMESPACE_BEGIN
# define BVH_status(...) \
{ \
string str = string_printf(__VA_ARGS__); \
progress.set_substatus(str); \
metal_printf("%s\n", str.c_str()); \
}
// # define BVH_THROTTLE_DIAGNOSTICS
# ifdef BVH_THROTTLE_DIAGNOSTICS
# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
# else
# define bvh_throttle_printf(...)
# endif
/* Limit the number of concurrent BVH builds so that we don't approach unsafe GPU working set
* sizes. */
struct BVHMetalBuildThrottler {
thread_mutex mutex;
size_t wired_memory = 0;
size_t safe_wired_limit = 0;
int requests_in_flight = 0;
BVHMetalBuildThrottler()
{
/* The default device will always be the one that supports MetalRT if the machine supports it.
*/
id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
/* Set a conservative limit, but which will still only throttle in extreme cases. */
safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
bvh_throttle_printf("safe_wired_limit = %zu\n", safe_wired_limit);
}
/* Block until we're safely able to wire the requested resources. */
void acquire(const size_t bytes_to_be_wired)
{
bool throttled = false;
while (true) {
{
thread_scoped_lock lock(mutex);
/* Always allow a BVH build to proceed if no other is in flight, otherwise
* only proceed if we're within safe limits. */
if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
wired_memory += bytes_to_be_wired;
requests_in_flight += 1;
bvh_throttle_printf("acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
requests_in_flight,
wired_memory);
return;
}
if (!throttled) {
bvh_throttle_printf(
"acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
"bytes_to_be_wired = %zu)\n",
requests_in_flight,
wired_memory,
bytes_to_be_wired);
}
throttled = true;
}
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}
/* Notify of resources that have stopped being wired. */
void release(const size_t bytes_just_unwired)
{
thread_scoped_lock lock(mutex);
wired_memory -= bytes_just_unwired;
requests_in_flight -= 1;
bvh_throttle_printf("release (requests_in_flight = %d, wired_memory = %zu)\n",
requests_in_flight,
wired_memory);
}
/* Wait for all outstanding work to finish. */
void wait_for_all()
{
while (true) {
{
thread_scoped_lock lock(mutex);
if (wired_memory == 0) {
return;
}
}
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}
} g_bvh_build_throttler;
/* macOS 15.2 and 15.3 has a bug in the dynamic BVH refitting which leads to missing geometry
* during render. The issue is fixed in the macOS 15.4, until then disable refitting even for
* the viewport.
* Note that dynamic BVH is still used on the scene level to speed up updates of instances and
* such. #132782. */
static bool support_refit_blas()
{
if (@available(macos 15.4, *)) {
return true;
}
if (@available(macos 15.2, *)) {
return false;
}
return true;
}
BVHMetal::BVHMetal(const BVHParams &params_,
const vector<Geometry *> &geometry_,
const vector<Object *> &objects_,
Device *device)
: BVH(params_, geometry_, objects_), device(device)
{
}
BVHMetal::~BVHMetal()
{
if (@available(macos 12.0, *)) {
set_accel_struct(nil);
if (null_BLAS) {
[null_BLAS release];
}
}
}
API_AVAILABLE(macos(11.0))
void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
{
if (@available(macos 12.0, *)) {
if (accel_struct) {
device->stats.mem_free(accel_struct.allocatedSize);
[accel_struct release];
accel_struct = nil;
}
if (new_accel_struct) {
accel_struct = new_accel_struct;
device->stats.mem_alloc(accel_struct.allocatedSize);
}
}
}
bool BVHMetal::build_BLAS_mesh(Progress &progress,
id<MTLDevice> mtl_device,
id<MTLCommandQueue> queue,
Geometry *const geom,
bool refit)
{
if (@available(macos 12.0, *)) {
/* Build BLAS for triangle primitives */
Mesh *const mesh = static_cast<Mesh *const>(geom);
if (mesh->num_triangles() == 0) {
return false;
}
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
const array<float3> &verts = mesh->get_verts();
const array<int> &tris = mesh->get_triangles();
const size_t num_verts = verts.size();
const size_t num_indices = tris.size();
size_t num_motion_steps = 1;
Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
num_motion_steps = mesh->get_motion_steps();
}
/* Upload the mesh data to the GPU */
id<MTLBuffer> posBuf = nil;
id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
length:num_indices * sizeof(tris.data()[0])
options:MTLResourceStorageModeShared];
if (num_motion_steps == 1) {
posBuf = [mtl_device newBufferWithBytes:verts.data()
length:num_verts * sizeof(verts.data()[0])
options:MTLResourceStorageModeShared];
}
else {
posBuf = [mtl_device
newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
options:MTLResourceStorageModeShared];
float3 *dest_data = (float3 *)[posBuf contents];
size_t center_step = (num_motion_steps - 1) / 2;
for (size_t step = 0; step < num_motion_steps; ++step) {
const float3 *verts = mesh->get_verts().data();
/* The center step for motion vertices is not stored in the attribute. */
if (step != center_step) {
verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
}
std::copy_n(verts, num_verts, dest_data + num_verts * step);
}
}
/* Create an acceleration structure. */
MTLAccelerationStructureGeometryDescriptor *geomDesc;
if (num_motion_steps > 1) {
std::vector<MTLMotionKeyframeData *> vertex_ptrs;
vertex_ptrs.reserve(num_motion_steps);
for (size_t step = 0; step < num_motion_steps; ++step) {
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
k.buffer = posBuf;
k.offset = num_verts * step * sizeof(float3);
vertex_ptrs.push_back(k);
}
MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
[MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
count:vertex_ptrs.size()];
geomDescMotion.vertexStride = sizeof(verts.data()[0]);
geomDescMotion.indexBuffer = indexBuf;
geomDescMotion.indexBufferOffset = 0;
geomDescMotion.indexType = MTLIndexTypeUInt32;
geomDescMotion.triangleCount = num_indices / 3;
geomDescMotion.intersectionFunctionTableOffset = 0;
geomDescMotion.opaque = true;
geomDesc = geomDescMotion;
BVH_status("Building motion mesh BLAS | %7d tris | %s | %7d motion keyframes",
(int)mesh->num_triangles(),
geom->name.c_str(),
(int)num_motion_steps);
}
else {
MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
geomDescNoMotion.vertexBuffer = posBuf;
geomDescNoMotion.vertexBufferOffset = 0;
geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
geomDescNoMotion.indexBuffer = indexBuf;
geomDescNoMotion.indexBufferOffset = 0;
geomDescNoMotion.indexType = MTLIndexTypeUInt32;
geomDescNoMotion.triangleCount = num_indices / 3;
geomDescNoMotion.intersectionFunctionTableOffset = 0;
geomDescNoMotion.opaque = true;
geomDesc = geomDescNoMotion;
BVH_status(
"Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
}
/* Force a single any-hit call, so shadow record-all behavior works correctly */
/* (Match optix behavior: unsigned int build_flags =
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
accelDesc.geometryDescriptors = @[ geomDesc ];
if (num_motion_steps > 1) {
accelDesc.motionStartTime = 0.0f;
accelDesc.motionEndTime = 1.0f;
accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
accelDesc.motionKeyframeCount = num_motion_steps;
}
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
if (!use_fast_trace_bvh) {
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
MTLAccelerationStructureUsagePreferFastBuild);
}
MTLAccelerationStructureSizes accelSizes = [mtl_device
accelerationStructureSizesWithDescriptor:accelDesc];
id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
options:MTLResourceStorageModePrivate];
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
options:MTLResourceStorageModeShared];
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
if (refit) {
[accelEnc refitAccelerationStructure:accel_struct
descriptor:accelDesc
destination:accel_uncompressed
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
else {
[accelEnc buildAccelerationStructure:accel_uncompressed
descriptor:accelDesc
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
if (use_fast_trace_bvh) {
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
toBuffer:sizeBuf
offset:0
sizeDataType:MTLDataTypeULong];
}
[accelEnc endEncoding];
/* Estimated size of resources that will be wired for the GPU accelerated build.
* Acceleration-struct size is doubled to account for possible compaction step. */
size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
accel_uncompressed.allocatedSize * 2;
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
/* free temp resources */
[scratchBuf release];
[indexBuf release];
[posBuf release];
if (use_fast_trace_bvh) {
/* Compact the accel structure */
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
id<MTLAccelerationStructure> accel = [mtl_device
newAccelerationStructureWithSize:compressed_size];
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
}];
[accelCommands commit];
});
}
else {
/* set our acceleration structure to the uncompressed structure */
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
}
[sizeBuf release];
}];
/* Wait until it's safe to proceed with GPU acceleration struct build. */
g_bvh_build_throttler.acquire(wired_size);
[accelCommands commit];
return true;
}
return false;
}
bool BVHMetal::build_BLAS_hair(Progress &progress,
id<MTLDevice> mtl_device,
id<MTLCommandQueue> queue,
Geometry *const geom,
bool refit)
{
# if defined(MAC_OS_VERSION_14_0)
if (@available(macos 14.0, *)) {
/* Build BLAS for hair curves */
Hair *hair = static_cast<Hair *>(geom);
if (hair->num_curves() == 0) {
return false;
}
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
size_t num_motion_steps = 1;
Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
num_motion_steps = hair->get_motion_steps();
}
id<MTLBuffer> cpBuffer = nil;
id<MTLBuffer> radiusBuffer = nil;
id<MTLBuffer> idxBuffer = nil;
MTLAccelerationStructureGeometryDescriptor *geomDesc;
if (num_motion_steps > 1) {
MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
[MTLAccelerationStructureMotionCurveGeometryDescriptor descriptor];
uint64_t numKeys = hair->num_keys();
uint64_t numCurves = hair->num_curves();
const array<float> &radiuses = hair->get_curve_radius();
/* Gather the curve geometry. */
std::vector<float3> cpData;
std::vector<int> idxData;
std::vector<float> radiusData;
cpData.reserve(numKeys);
radiusData.reserve(numKeys);
std::vector<int> step_offsets;
for (size_t step = 0; step < num_motion_steps; ++step) {
/* The center step for motion vertices is not stored in the attribute. */
const float3 *keys = hair->get_curve_keys().data();
size_t center_step = (num_motion_steps - 1) / 2;
if (step != center_step) {
size_t attr_offset = (step > center_step) ? step - 1 : step;
/* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
keys = motion_keys->data_float3() + attr_offset * numKeys;
}
step_offsets.push_back(cpData.size());
for (int c = 0; c < numCurves; ++c) {
const Hair::Curve curve = hair->get_curve(c);
int segCount = curve.num_segments();
int firstKey = curve.first_key;
uint64_t idxBase = cpData.size();
cpData.push_back(keys[firstKey]);
radiusData.push_back(radiuses[firstKey]);
for (int s = 0; s < segCount; ++s) {
if (step == 0) {
idxData.push_back(idxBase + s);
}
cpData.push_back(keys[firstKey + s]);
radiusData.push_back(radiuses[firstKey + s]);
}
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
}
}
/* Allocate and populate MTLBuffers for geometry. */
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
length:idxData.size() * sizeof(int)
options:MTLResourceStorageModeShared];
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
length:cpData.size() * sizeof(float3)
options:MTLResourceStorageModeShared];
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
length:radiusData.size() * sizeof(float)
options:MTLResourceStorageModeShared];
std::vector<MTLMotionKeyframeData *> cp_ptrs;
std::vector<MTLMotionKeyframeData *> radius_ptrs;
cp_ptrs.reserve(num_motion_steps);
radius_ptrs.reserve(num_motion_steps);
for (size_t step = 0; step < num_motion_steps; ++step) {
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
k.buffer = cpBuffer;
k.offset = step_offsets[step] * sizeof(float3);
cp_ptrs.push_back(k);
k = [MTLMotionKeyframeData data];
k.buffer = radiusBuffer;
k.offset = step_offsets[step] * sizeof(float);
radius_ptrs.push_back(k);
}
geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
count:cp_ptrs.size()];
geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
count:radius_ptrs.size()];
geomDescCrv.controlPointCount = cpData.size();
geomDescCrv.controlPointStride = sizeof(float3);
geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
geomDescCrv.radiusStride = sizeof(float);
geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
geomDescCrv.segmentCount = idxData.size();
geomDescCrv.segmentControlPointCount = 4;
geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
MTLCurveTypeRound;
geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
geomDescCrv.indexType = MTLIndexTypeUInt32;
geomDescCrv.indexBuffer = idxBuffer;
geomDescCrv.intersectionFunctionTableOffset = 1;
/* Force a single any-hit call, so shadow record-all behavior works correctly */
/* (Match optix behavior: unsigned int build_flags =
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
geomDescCrv.opaque = true;
geomDesc = geomDescCrv;
}
else {
MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
[MTLAccelerationStructureCurveGeometryDescriptor descriptor];
uint64_t numKeys = hair->num_keys();
uint64_t numCurves = hair->num_curves();
const array<float> &radiuses = hair->get_curve_radius();
/* Gather the curve geometry. */
std::vector<float3> cpData;
std::vector<int> idxData;
std::vector<float> radiusData;
cpData.reserve(numKeys);
radiusData.reserve(numKeys);
auto keys = hair->get_curve_keys();
for (int c = 0; c < numCurves; ++c) {
const Hair::Curve curve = hair->get_curve(c);
int segCount = curve.num_segments();
int firstKey = curve.first_key;
radiusData.push_back(radiuses[firstKey]);
uint64_t idxBase = cpData.size();
cpData.push_back(keys[firstKey]);
for (int s = 0; s < segCount; ++s) {
idxData.push_back(idxBase + s);
cpData.push_back(keys[firstKey + s]);
radiusData.push_back(radiuses[firstKey + s]);
}
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
}
/* Allocate and populate MTLBuffers for geometry. */
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
length:idxData.size() * sizeof(int)
options:MTLResourceStorageModeShared];
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
length:cpData.size() * sizeof(float3)
options:MTLResourceStorageModeShared];
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
length:radiusData.size() * sizeof(float)
options:MTLResourceStorageModeShared];
geomDescCrv.controlPointBuffer = cpBuffer;
geomDescCrv.radiusBuffer = radiusBuffer;
geomDescCrv.controlPointCount = cpData.size();
geomDescCrv.controlPointStride = sizeof(float3);
geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
geomDescCrv.controlPointBufferOffset = 0;
geomDescCrv.segmentCount = idxData.size();
geomDescCrv.segmentControlPointCount = 4;
geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
MTLCurveTypeRound;
geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
geomDescCrv.indexType = MTLIndexTypeUInt32;
geomDescCrv.indexBuffer = idxBuffer;
geomDescCrv.intersectionFunctionTableOffset = 1;
/* Force a single any-hit call, so shadow record-all behavior works correctly */
/* (Match optix behavior: unsigned int build_flags =
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
geomDescCrv.opaque = true;
geomDesc = geomDescCrv;
}
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
accelDesc.geometryDescriptors = @[ geomDesc ];
if (num_motion_steps > 1) {
accelDesc.motionStartTime = 0.0f;
accelDesc.motionEndTime = 1.0f;
accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
accelDesc.motionKeyframeCount = num_motion_steps;
BVH_status("Building motion hair BLAS | %7d curves | %s | %7d motion keyframes",
(int)hair->num_curves(),
geom->name.c_str(),
(int)num_motion_steps);
}
else {
BVH_status(
"Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
}
if (!use_fast_trace_bvh) {
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
MTLAccelerationStructureUsagePreferFastBuild);
}
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
MTLAccelerationStructureSizes accelSizes = [mtl_device
accelerationStructureSizesWithDescriptor:accelDesc];
id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
options:MTLResourceStorageModePrivate];
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
options:MTLResourceStorageModeShared];
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
if (refit) {
[accelEnc refitAccelerationStructure:accel_struct
descriptor:accelDesc
destination:accel_uncompressed
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
else {
[accelEnc buildAccelerationStructure:accel_uncompressed
descriptor:accelDesc
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
if (use_fast_trace_bvh) {
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
toBuffer:sizeBuf
offset:0
sizeDataType:MTLDataTypeULong];
}
[accelEnc endEncoding];
/* Estimated size of resources that will be wired for the GPU accelerated build.
* Acceleration-struct size is doubled to account for possible compaction step. */
size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
idxBuffer.allocatedSize + scratchBuf.allocatedSize +
accel_uncompressed.allocatedSize * 2;
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
/* free temp resources */
[scratchBuf release];
[cpBuffer release];
[radiusBuffer release];
[idxBuffer release];
if (use_fast_trace_bvh) {
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
id<MTLAccelerationStructure> accel = [mtl_device
newAccelerationStructureWithSize:compressed_size];
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
}];
[accelCommands commit];
});
}
else {
/* set our acceleration structure to the uncompressed structure */
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
}
[sizeBuf release];
}];
/* Wait until it's safe to proceed with GPU acceleration struct build. */
g_bvh_build_throttler.acquire(wired_size);
[accelCommands commit];
return true;
}
# else /* MAC_OS_VERSION_14_0 */
(void)progress;
(void)mtl_device;
(void)queue;
(void)geom;
(void)(refit);
# endif /* MAC_OS_VERSION_14_0 */
return false;
}
bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
id<MTLDevice> mtl_device,
id<MTLCommandQueue> queue,
Geometry *const geom,
bool refit)
{
if (@available(macos 12.0, *)) {
/* Build BLAS for point cloud */
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
if (pointcloud->num_points() == 0) {
return false;
}
const size_t num_points = pointcloud->get_points().size();
const float3 *points = pointcloud->get_points().data();
const float *radius = pointcloud->get_radius().data();
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
size_t num_motion_steps = 1;
Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
num_motion_steps = pointcloud->get_motion_steps();
}
const size_t num_aabbs = num_motion_steps * num_points;
/* Allocate a GPU buffer for the AABB data and populate it */
id<MTLBuffer> aabbBuf = [mtl_device
newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
options:MTLResourceStorageModeShared];
MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
/* Get AABBs for each motion step */
size_t center_step = (num_motion_steps - 1) / 2;
for (size_t step = 0; step < num_motion_steps; ++step) {
if (step == center_step) {
/* The center step for motion vertices is not stored in the attribute */
for (size_t j = 0; j < num_points; ++j) {
const PointCloud::Point point = pointcloud->get_point(j);
BoundBox bounds = BoundBox::empty;
point.bounds_grow(points, radius, bounds);
const size_t index = step * num_points + j;
aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
}
}
else {
size_t attr_offset = (step > center_step) ? step - 1 : step;
float4 *motion_points = motion_keys->data_float4() + attr_offset * num_points;
for (size_t j = 0; j < num_points; ++j) {
const PointCloud::Point point = pointcloud->get_point(j);
BoundBox bounds = BoundBox::empty;
point.bounds_grow(motion_points[j], bounds);
const size_t index = step * num_points + j;
aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
}
}
}
MTLAccelerationStructureGeometryDescriptor *geomDesc;
if (num_motion_steps > 1) {
std::vector<MTLMotionKeyframeData *> aabb_ptrs;
aabb_ptrs.reserve(num_motion_steps);
for (size_t step = 0; step < num_motion_steps; ++step) {
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
k.buffer = aabbBuf;
k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox);
aabb_ptrs.push_back(k);
}
MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
[MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
count:aabb_ptrs.size()];
geomDescMotion.boundingBoxCount = num_points;
geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
geomDescMotion.intersectionFunctionTableOffset = 2;
/* Force a single any-hit call, so shadow record-all behavior works correctly */
/* (Match optix behavior: unsigned int build_flags =
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
geomDescMotion.opaque = true;
geomDesc = geomDescMotion;
}
else {
MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
[MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
geomDescNoMotion.boundingBoxBuffer = aabbBuf;
geomDescNoMotion.boundingBoxBufferOffset = 0;
geomDescNoMotion.boundingBoxCount = int(num_aabbs);
geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
geomDescNoMotion.intersectionFunctionTableOffset = 2;
/* Force a single any-hit call, so shadow record-all behavior works correctly */
/* (Match optix behavior: unsigned int build_flags =
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
geomDescNoMotion.opaque = true;
geomDesc = geomDescNoMotion;
}
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
accelDesc.geometryDescriptors = @[ geomDesc ];
if (num_motion_steps > 1) {
accelDesc.motionStartTime = 0.0f;
accelDesc.motionEndTime = 1.0f;
// accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
// accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
accelDesc.motionKeyframeCount = num_motion_steps;
BVH_status("Building motion pointcloud BLAS | %7d points | %s | %7d motion keyframes",
(int)pointcloud->num_points(),
geom->name.c_str(),
(int)num_motion_steps);
}
else {
BVH_status("Building pointcloud BLAS | %7d points | %s",
(int)pointcloud->num_points(),
geom->name.c_str());
}
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
if (!use_fast_trace_bvh) {
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
MTLAccelerationStructureUsagePreferFastBuild);
}
MTLAccelerationStructureSizes accelSizes = [mtl_device
accelerationStructureSizesWithDescriptor:accelDesc];
id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
options:MTLResourceStorageModePrivate];
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
options:MTLResourceStorageModeShared];
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
if (refit) {
[accelEnc refitAccelerationStructure:accel_struct
descriptor:accelDesc
destination:accel_uncompressed
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
else {
[accelEnc buildAccelerationStructure:accel_uncompressed
descriptor:accelDesc
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
if (use_fast_trace_bvh) {
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
toBuffer:sizeBuf
offset:0
sizeDataType:MTLDataTypeULong];
}
[accelEnc endEncoding];
/* Estimated size of resources that will be wired for the GPU accelerated build.
* Acceleration-struct size is doubled to account for possible compaction step. */
size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
accel_uncompressed.allocatedSize * 2;
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
/* free temp resources */
[scratchBuf release];
[aabbBuf release];
if (use_fast_trace_bvh) {
/* Compact the accel structure */
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
id<MTLAccelerationStructure> accel = [mtl_device
newAccelerationStructureWithSize:compressed_size];
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
}];
[accelCommands commit];
});
}
else {
/* set our acceleration structure to the uncompressed structure */
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
}
[sizeBuf release];
}];
/* Wait until it's safe to proceed with GPU acceleration struct build. */
g_bvh_build_throttler.acquire(wired_size);
[accelCommands commit];
return true;
}
return false;
}
bool BVHMetal::build_BLAS(Progress &progress,
id<MTLDevice> mtl_device,
id<MTLCommandQueue> queue,
bool refit)
{
assert(objects.size() == 1 && geometry.size() == 1);
/* Build bottom level acceleration structures (BLAS) */
Geometry *const geom = geometry[0];
switch (geom->geometry_type) {
case Geometry::VOLUME:
case Geometry::MESH:
return build_BLAS_mesh(progress, mtl_device, queue, geom, refit);
case Geometry::HAIR:
return build_BLAS_hair(progress, mtl_device, queue, geom, refit);
case Geometry::POINTCLOUD:
return build_BLAS_pointcloud(progress, mtl_device, queue, geom, refit);
default:
return false;
}
return false;
}
# if defined(MAC_OS_VERSION_15_0)
/* Return MTLComponentTransform from a DecomposedTransform. */
static MTLComponentTransform decomposed_to_component_transform(const DecomposedTransform &src)
{
MTLComponentTransform tfm;
tfm.scale = MTLPackedFloat3Make(src.y.w, src.z.w, src.w.w);
tfm.shear = MTLPackedFloat3Make(src.z.x, src.z.y, src.w.x);
tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
tfm.rotation = MTLPackedFloatQuaternionMake(src.x.x, src.x.y, src.x.z, src.x.w);
tfm.translation = MTLPackedFloat3Make(src.y.x, src.y.y, src.y.z);
return tfm;
}
/* Return unit MTLComponentTransform. */
static MTLComponentTransform component_transform_make_unit()
{
MTLComponentTransform tfm;
tfm.scale = MTLPackedFloat3Make(1.0f, 1.0f, 1.0f);
tfm.shear = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
tfm.rotation = MTLPackedFloatQuaternionMake(0.0f, 0.0f, 0.0f, 1.0f);
tfm.translation = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
return tfm;
}
# endif
bool BVHMetal::build_TLAS(Progress &progress,
id<MTLDevice> mtl_device,
id<MTLCommandQueue> queue,
bool refit)
{
/* Wait for all BLAS builds to finish. */
g_bvh_build_throttler.wait_for_all();
if (@available(macos 12.0, *)) {
/* Defined inside available check, for return type to be available. */
auto make_null_BLAS = [](id<MTLDevice> mtl_device,
id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3)
options:MTLResourceStorageModeShared];
/* Create an acceleration structure. */
MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
geomDesc.vertexBuffer = nullBuf;
geomDesc.vertexBufferOffset = 0;
geomDesc.vertexStride = sizeof(float3);
geomDesc.indexBuffer = nullBuf;
geomDesc.indexBufferOffset = 0;
geomDesc.indexType = MTLIndexTypeUInt32;
geomDesc.triangleCount = 0;
geomDesc.intersectionFunctionTableOffset = 0;
geomDesc.opaque = true;
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
accelDesc.geometryDescriptors = @[ geomDesc ];
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
MTLAccelerationStructureSizes accelSizes = [mtl_device
accelerationStructureSizesWithDescriptor:accelDesc];
id<MTLAccelerationStructure> accel_struct = [mtl_device
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
options:MTLResourceStorageModePrivate];
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
options:MTLResourceStorageModeShared];
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
[accelEnc buildAccelerationStructure:accel_struct
descriptor:accelDesc
scratchBuffer:scratchBuf
scratchBufferOffset:0];
[accelEnc endEncoding];
[accelCommands commit];
[accelCommands waitUntilCompleted];
/* free temp resources */
[scratchBuf release];
[nullBuf release];
[sizeBuf release];
return accel_struct;
};
uint32_t num_instances = 0;
uint32_t num_motion_transforms = 0;
uint32_t num_motion_instances = 0;
for (Object *ob : objects) {
num_instances++;
if (ob->use_motion()) {
num_motion_transforms += max((size_t)1, ob->get_motion().size());
num_motion_instances++;
}
else {
num_motion_transforms++;
}
}
if (num_instances == 0) {
return false;
}
const bool use_instance_motion = motion_blur && num_motion_instances;
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
NSMutableArray *all_blas = [NSMutableArray array];
unordered_map<const BVHMetal *, int> instance_mapping;
/* Lambda function to build/retrieve the BLAS index mapping */
auto get_blas_index = [&](const BVHMetal *blas) {
auto it = instance_mapping.find(blas);
if (it != instance_mapping.end()) {
return it->second;
}
int blas_index = (int)[all_blas count];
instance_mapping[blas] = blas_index;
if (@available(macos 12.0, *)) {
[all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
}
return blas_index;
};
size_t instance_size;
if (use_instance_motion) {
instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
}
else {
instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
}
/* Allocate a GPU buffer for the instance data and populate it */
id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
options:MTLResourceStorageModeShared];
id<MTLBuffer> motion_transforms_buf = nil;
MTLPackedFloat4x3 *matrix_motion_transforms = nullptr;
# if defined(MAC_OS_VERSION_15_0)
MTLComponentTransform *decomposed_motion_transforms = nullptr;
# endif
if (use_instance_motion && num_motion_transforms) {
# if defined(MAC_OS_VERSION_15_0)
if (use_pcmi) {
if (@available(macos 15.0, *)) {
motion_transforms_buf = [mtl_device
newBufferWithLength:num_motion_transforms * sizeof(MTLComponentTransform)
options:MTLResourceStorageModeShared];
decomposed_motion_transforms = (MTLComponentTransform *)motion_transforms_buf.contents;
}
}
else
# endif
{
motion_transforms_buf = [mtl_device
newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
options:MTLResourceStorageModeShared];
matrix_motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
}
}
uint32_t instance_index = 0;
uint32_t motion_transform_index = 0;
blas_array.clear();
blas_array.reserve(num_instances);
for (Object *ob : objects) {
/* Skip non-traceable objects */
const Geometry *geom = ob->get_geometry();
const BVHMetal *blas = static_cast<const BVHMetal *>(geom->bvh.get());
if (!blas || !blas->accel_struct || !ob->is_traceable()) {
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
* in our intersection functions */
blas = nullptr;
/* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
* the descriptor. */
if (!null_BLAS) {
null_BLAS = make_null_BLAS(mtl_device, queue);
}
blas_array.push_back(null_BLAS);
}
else {
blas_array.push_back(blas->accel_struct);
}
uint32_t accel_struct_index = get_blas_index(blas);
/* Add some of the object visibility bits to the mask.
* __prim_visibility contains the combined visibility bits of all instances, so is not
* reliable if they differ between instances.
*/
uint32_t mask = ob->visibility_for_tracing();
/* Have to have at least one bit in the mask, or else instance would always be culled. */
if (0 == mask) {
mask = 0xFF;
}
/* Set user instance ID to object index */
uint32_t primitive_offset = 0;
int currIndex = instance_index++;
if (geom->is_hair()) {
/* Build BLAS for curve primitives. */
Hair *const hair = static_cast<Hair *const>(const_cast<Geometry *>(geom));
primitive_offset = uint32_t(hair->curve_segment_offset);
}
else if (geom->is_mesh() || geom->is_volume()) {
/* Build BLAS for triangle primitives. */
Mesh *const mesh = static_cast<Mesh *const>(const_cast<Geometry *>(geom));
primitive_offset = uint32_t(mesh->prim_offset);
}
else if (geom->is_pointcloud()) {
/* Build BLAS for points primitives. */
PointCloud *const pointcloud = static_cast<PointCloud *const>(
const_cast<Geometry *>(geom));
primitive_offset = uint32_t(pointcloud->prim_offset);
}
/* Bake into the appropriate descriptor */
if (use_instance_motion) {
MTLAccelerationStructureMotionInstanceDescriptor *instances =
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
desc.accelerationStructureIndex = accel_struct_index;
desc.userID = primitive_offset;
desc.mask = mask;
desc.motionStartTime = 0.0f;
desc.motionEndTime = 1.0f;
desc.motionTransformsStartIndex = motion_transform_index;
desc.motionStartBorderMode = MTLMotionBorderModeVanish;
desc.motionEndBorderMode = MTLMotionBorderModeVanish;
desc.intersectionFunctionTableOffset = 0;
array<DecomposedTransform> decomp(ob->get_motion().size());
transform_motion_decompose(
decomp.data(), ob->get_motion().data(), ob->get_motion().size());
int key_count = ob->get_motion().size();
if (key_count) {
desc.motionTransformsCount = key_count;
# if defined(MAC_OS_VERSION_15_0)
if (use_pcmi) {
for (int i = 0; i < key_count; i++) {
decomposed_motion_transforms[motion_transform_index++] =
decomposed_to_component_transform(decomp[i]);
}
}
else
# endif
{
Transform *keys = ob->get_motion().data();
for (int i = 0; i < key_count; i++) {
float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
/* Transpose transform */
const auto *src = (const float *)&keys[i];
for (int i = 0; i < 12; i++) {
t[i] = src[(i / 3) + 4 * (i % 3)];
}
}
}
}
else {
desc.motionTransformsCount = 1;
# if defined(MAC_OS_VERSION_15_0)
if (use_pcmi) {
if (ob->get_geometry()->is_instanced()) {
decomposed_motion_transforms[motion_transform_index++] =
decomposed_to_component_transform(decomp[0]);
}
else {
decomposed_motion_transforms[motion_transform_index++] =
component_transform_make_unit();
}
}
else
# endif
{
float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
if (ob->get_geometry()->is_instanced()) {
/* Transpose transform */
const auto *src = (const float *)&ob->get_tfm();
for (int i = 0; i < 12; i++) {
t[i] = src[(i / 3) + 4 * (i % 3)];
}
}
else {
/* Clear transform to identity matrix */
t[0] = t[4] = t[8] = 1.0f;
}
}
}
}
else {
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
desc.accelerationStructureIndex = accel_struct_index;
desc.userID = primitive_offset;
desc.mask = mask;
desc.intersectionFunctionTableOffset = 0;
desc.options = MTLAccelerationStructureInstanceOptionOpaque;
float *t = (float *)&desc.transformationMatrix;
if (ob->get_geometry()->is_instanced()) {
/* Transpose transform */
const auto *src = (const float *)&ob->get_tfm();
for (int i = 0; i < 12; i++) {
t[i] = src[(i / 3) + 4 * (i % 3)];
}
}
else {
/* Clear transform to identity matrix */
t[0] = t[4] = t[8] = 1.0f;
}
}
}
if (use_instance_motion) {
BVH_status(
"Building motion TLAS | %7d instances | %7d motion instances | %7d motion "
"transforms",
(int)num_instances,
(int)num_motion_instances,
(int)num_motion_transforms);
}
else {
BVH_status("Building TLAS | %7d instances", (int)num_instances);
}
MTLInstanceAccelerationStructureDescriptor *accelDesc =
[MTLInstanceAccelerationStructureDescriptor descriptor];
accelDesc.instanceCount = num_instances;
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
accelDesc.instanceDescriptorBuffer = instanceBuf;
accelDesc.instanceDescriptorBufferOffset = 0;
accelDesc.instanceDescriptorStride = instance_size;
accelDesc.instancedAccelerationStructures = all_blas;
if (use_instance_motion) {
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
accelDesc.motionTransformBuffer = motion_transforms_buf;
accelDesc.motionTransformCount = num_motion_transforms;
# if defined(MAC_OS_VERSION_15_0)
if (@available(macos 15.0, *)) {
accelDesc.motionTransformStride = 0;
accelDesc.motionTransformType = use_pcmi ? MTLTransformTypeComponent :
MTLTransformTypePackedFloat4x3;
}
# endif
}
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
if (!use_fast_trace_bvh) {
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
MTLAccelerationStructureUsagePreferFastBuild);
}
MTLAccelerationStructureSizes accelSizes = [mtl_device
accelerationStructureSizesWithDescriptor:accelDesc];
id<MTLAccelerationStructure> accel = [mtl_device
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
options:MTLResourceStorageModePrivate];
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
if (refit) {
[accelEnc refitAccelerationStructure:accel_struct
descriptor:accelDesc
destination:accel
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
else {
[accelEnc buildAccelerationStructure:accel
descriptor:accelDesc
scratchBuffer:scratchBuf
scratchBufferOffset:0];
}
[accelEnc endEncoding];
[accelCommands commit];
[accelCommands waitUntilCompleted];
if (motion_transforms_buf) {
[motion_transforms_buf release];
}
[instanceBuf release];
[scratchBuf release];
/* Cache top and bottom-level acceleration structs */
set_accel_struct(accel);
unique_blas_array.clear();
unique_blas_array.reserve(all_blas.count);
[all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
unique_blas_array.push_back(blas);
}];
return true;
}
return false;
}
bool BVHMetal::build(Progress &progress,
id<MTLDevice> mtl_device,
id<MTLCommandQueue> queue,
bool refit)
{
if (@available(macos 12.0, *)) {
if (refit) {
/* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
* In such cases, assert in development but try to recover in the wild. */
if (params.bvh_type != BVH_TYPE_DYNAMIC) {
assert(!"Can't refit static Metal BVH");
refit = false;
}
else if (!accel_struct) {
assert(!"Can't refit non-existing Metal BVH");
refit = false;
}
}
if (!refit) {
set_accel_struct(nil);
}
}
if (!support_refit_blas()) {
refit = false;
}
@autoreleasepool {
if (!params.top_level) {
return build_BLAS(progress, mtl_device, queue, refit);
}
return build_TLAS(progress, mtl_device, queue, refit);
}
}
CCL_NAMESPACE_END
#endif /* WITH_METAL */