1399 lines
54 KiB
Plaintext
1399 lines
54 KiB
Plaintext
/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#ifdef WITH_METAL
|
|
|
|
# include <algorithm>
|
|
# include <chrono>
|
|
# include <thread>
|
|
# include <vector>
|
|
|
|
# include "scene/hair.h"
|
|
# include "scene/mesh.h"
|
|
# include "scene/object.h"
|
|
# include "scene/pointcloud.h"
|
|
|
|
# include "util/progress.h"
|
|
|
|
# include "device/metal/bvh.h"
|
|
# include "device/metal/util.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
# define BVH_status(...) \
|
|
{ \
|
|
string str = string_printf(__VA_ARGS__); \
|
|
progress.set_substatus(str); \
|
|
metal_printf("%s\n", str.c_str()); \
|
|
}
|
|
|
|
// # define BVH_THROTTLE_DIAGNOSTICS
|
|
# ifdef BVH_THROTTLE_DIAGNOSTICS
|
|
# define bvh_throttle_printf(...) printf("BVHMetalBuildThrottler::" __VA_ARGS__)
|
|
# else
|
|
# define bvh_throttle_printf(...)
|
|
# endif
|
|
|
|
/* Limit the number of concurrent BVH builds so that we don't approach unsafe GPU working set
|
|
* sizes. */
|
|
struct BVHMetalBuildThrottler {
|
|
thread_mutex mutex;
|
|
size_t wired_memory = 0;
|
|
size_t safe_wired_limit = 0;
|
|
int requests_in_flight = 0;
|
|
|
|
BVHMetalBuildThrottler()
|
|
{
|
|
/* The default device will always be the one that supports MetalRT if the machine supports it.
|
|
*/
|
|
id<MTLDevice> mtlDevice = MTLCreateSystemDefaultDevice();
|
|
|
|
/* Set a conservative limit, but which will still only throttle in extreme cases. */
|
|
safe_wired_limit = [mtlDevice recommendedMaxWorkingSetSize] / 4;
|
|
bvh_throttle_printf("safe_wired_limit = %zu\n", safe_wired_limit);
|
|
}
|
|
|
|
/* Block until we're safely able to wire the requested resources. */
|
|
void acquire(const size_t bytes_to_be_wired)
|
|
{
|
|
bool throttled = false;
|
|
while (true) {
|
|
{
|
|
thread_scoped_lock lock(mutex);
|
|
|
|
/* Always allow a BVH build to proceed if no other is in flight, otherwise
|
|
* only proceed if we're within safe limits. */
|
|
if (wired_memory == 0 || wired_memory + bytes_to_be_wired <= safe_wired_limit) {
|
|
wired_memory += bytes_to_be_wired;
|
|
requests_in_flight += 1;
|
|
bvh_throttle_printf("acquire -- success (requests_in_flight = %d, wired_memory = %zu)\n",
|
|
requests_in_flight,
|
|
wired_memory);
|
|
return;
|
|
}
|
|
|
|
if (!throttled) {
|
|
bvh_throttle_printf(
|
|
"acquire -- throttling (requests_in_flight = %d, wired_memory = %zu, "
|
|
"bytes_to_be_wired = %zu)\n",
|
|
requests_in_flight,
|
|
wired_memory,
|
|
bytes_to_be_wired);
|
|
}
|
|
throttled = true;
|
|
}
|
|
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
|
}
|
|
}
|
|
|
|
/* Notify of resources that have stopped being wired. */
|
|
void release(const size_t bytes_just_unwired)
|
|
{
|
|
thread_scoped_lock lock(mutex);
|
|
wired_memory -= bytes_just_unwired;
|
|
requests_in_flight -= 1;
|
|
bvh_throttle_printf("release (requests_in_flight = %d, wired_memory = %zu)\n",
|
|
requests_in_flight,
|
|
wired_memory);
|
|
}
|
|
|
|
/* Wait for all outstanding work to finish. */
|
|
void wait_for_all()
|
|
{
|
|
while (true) {
|
|
{
|
|
thread_scoped_lock lock(mutex);
|
|
if (wired_memory == 0) {
|
|
return;
|
|
}
|
|
}
|
|
std::this_thread::sleep_for(std::chrono::milliseconds(10));
|
|
}
|
|
}
|
|
} g_bvh_build_throttler;
|
|
|
|
/* macOS 15.2 and 15.3 has a bug in the dynamic BVH refitting which leads to missing geometry
|
|
* during render. The issue is fixed in the macOS 15.4, until then disable refitting even for
|
|
* the viewport.
|
|
* Note that dynamic BVH is still used on the scene level to speed up updates of instances and
|
|
* such. #132782. */
|
|
static bool support_refit_blas()
|
|
{
|
|
if (@available(macos 15.4, *)) {
|
|
return true;
|
|
}
|
|
if (@available(macos 15.2, *)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
BVHMetal::BVHMetal(const BVHParams ¶ms_,
|
|
const vector<Geometry *> &geometry_,
|
|
const vector<Object *> &objects_,
|
|
Device *device)
|
|
: BVH(params_, geometry_, objects_), device(device)
|
|
{
|
|
}
|
|
|
|
BVHMetal::~BVHMetal()
|
|
{
|
|
if (@available(macos 12.0, *)) {
|
|
set_accel_struct(nil);
|
|
if (null_BLAS) {
|
|
[null_BLAS release];
|
|
}
|
|
}
|
|
}
|
|
|
|
API_AVAILABLE(macos(11.0))
|
|
void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
|
|
{
|
|
if (@available(macos 12.0, *)) {
|
|
if (accel_struct) {
|
|
device->stats.mem_free(accel_struct.allocatedSize);
|
|
[accel_struct release];
|
|
accel_struct = nil;
|
|
}
|
|
|
|
if (new_accel_struct) {
|
|
accel_struct = new_accel_struct;
|
|
device->stats.mem_alloc(accel_struct.allocatedSize);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool BVHMetal::build_BLAS_mesh(Progress &progress,
|
|
id<MTLDevice> mtl_device,
|
|
id<MTLCommandQueue> queue,
|
|
Geometry *const geom,
|
|
bool refit)
|
|
{
|
|
if (@available(macos 12.0, *)) {
|
|
/* Build BLAS for triangle primitives */
|
|
Mesh *const mesh = static_cast<Mesh *const>(geom);
|
|
if (mesh->num_triangles() == 0) {
|
|
return false;
|
|
}
|
|
|
|
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
|
|
|
|
const array<float3> &verts = mesh->get_verts();
|
|
const array<int> &tris = mesh->get_triangles();
|
|
const size_t num_verts = verts.size();
|
|
const size_t num_indices = tris.size();
|
|
|
|
size_t num_motion_steps = 1;
|
|
Attribute *motion_keys = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
|
if (motion_blur && mesh->get_use_motion_blur() && motion_keys) {
|
|
num_motion_steps = mesh->get_motion_steps();
|
|
}
|
|
|
|
/* Upload the mesh data to the GPU */
|
|
id<MTLBuffer> posBuf = nil;
|
|
id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
|
|
length:num_indices * sizeof(tris.data()[0])
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
if (num_motion_steps == 1) {
|
|
posBuf = [mtl_device newBufferWithBytes:verts.data()
|
|
length:num_verts * sizeof(verts.data()[0])
|
|
options:MTLResourceStorageModeShared];
|
|
}
|
|
else {
|
|
posBuf = [mtl_device
|
|
newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
|
|
options:MTLResourceStorageModeShared];
|
|
float3 *dest_data = (float3 *)[posBuf contents];
|
|
size_t center_step = (num_motion_steps - 1) / 2;
|
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
|
const float3 *verts = mesh->get_verts().data();
|
|
|
|
/* The center step for motion vertices is not stored in the attribute. */
|
|
if (step != center_step) {
|
|
verts = motion_keys->data_float3() + (step > center_step ? step - 1 : step) * num_verts;
|
|
}
|
|
std::copy_n(verts, num_verts, dest_data + num_verts * step);
|
|
}
|
|
}
|
|
|
|
/* Create an acceleration structure. */
|
|
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
|
if (num_motion_steps > 1) {
|
|
std::vector<MTLMotionKeyframeData *> vertex_ptrs;
|
|
vertex_ptrs.reserve(num_motion_steps);
|
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
|
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
|
k.buffer = posBuf;
|
|
k.offset = num_verts * step * sizeof(float3);
|
|
vertex_ptrs.push_back(k);
|
|
}
|
|
|
|
MTLAccelerationStructureMotionTriangleGeometryDescriptor *geomDescMotion =
|
|
[MTLAccelerationStructureMotionTriangleGeometryDescriptor descriptor];
|
|
geomDescMotion.vertexBuffers = [NSArray arrayWithObjects:vertex_ptrs.data()
|
|
count:vertex_ptrs.size()];
|
|
geomDescMotion.vertexStride = sizeof(verts.data()[0]);
|
|
geomDescMotion.indexBuffer = indexBuf;
|
|
geomDescMotion.indexBufferOffset = 0;
|
|
geomDescMotion.indexType = MTLIndexTypeUInt32;
|
|
geomDescMotion.triangleCount = num_indices / 3;
|
|
geomDescMotion.intersectionFunctionTableOffset = 0;
|
|
geomDescMotion.opaque = true;
|
|
|
|
geomDesc = geomDescMotion;
|
|
|
|
BVH_status("Building motion mesh BLAS | %7d tris | %s | %7d motion keyframes",
|
|
(int)mesh->num_triangles(),
|
|
geom->name.c_str(),
|
|
(int)num_motion_steps);
|
|
}
|
|
else {
|
|
MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion =
|
|
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
|
|
geomDescNoMotion.vertexBuffer = posBuf;
|
|
geomDescNoMotion.vertexBufferOffset = 0;
|
|
geomDescNoMotion.vertexStride = sizeof(verts.data()[0]);
|
|
geomDescNoMotion.indexBuffer = indexBuf;
|
|
geomDescNoMotion.indexBufferOffset = 0;
|
|
geomDescNoMotion.indexType = MTLIndexTypeUInt32;
|
|
geomDescNoMotion.triangleCount = num_indices / 3;
|
|
geomDescNoMotion.intersectionFunctionTableOffset = 0;
|
|
geomDescNoMotion.opaque = true;
|
|
|
|
geomDesc = geomDescNoMotion;
|
|
|
|
BVH_status(
|
|
"Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str());
|
|
}
|
|
|
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
|
/* (Match optix behavior: unsigned int build_flags =
|
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
|
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
|
|
|
|
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
|
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
|
accelDesc.geometryDescriptors = @[ geomDesc ];
|
|
if (num_motion_steps > 1) {
|
|
accelDesc.motionStartTime = 0.0f;
|
|
accelDesc.motionEndTime = 1.0f;
|
|
accelDesc.motionStartBorderMode = MTLMotionBorderModeClamp;
|
|
accelDesc.motionEndBorderMode = MTLMotionBorderModeClamp;
|
|
accelDesc.motionKeyframeCount = num_motion_steps;
|
|
}
|
|
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
|
|
|
|
if (!use_fast_trace_bvh) {
|
|
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
|
MTLAccelerationStructureUsagePreferFastBuild);
|
|
}
|
|
|
|
MTLAccelerationStructureSizes accelSizes = [mtl_device
|
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
|
id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
|
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
|
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
|
|
options:MTLResourceStorageModePrivate];
|
|
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
|
|
options:MTLResourceStorageModeShared];
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
if (refit) {
|
|
[accelEnc refitAccelerationStructure:accel_struct
|
|
descriptor:accelDesc
|
|
destination:accel_uncompressed
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
else {
|
|
[accelEnc buildAccelerationStructure:accel_uncompressed
|
|
descriptor:accelDesc
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
if (use_fast_trace_bvh) {
|
|
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
|
toBuffer:sizeBuf
|
|
offset:0
|
|
sizeDataType:MTLDataTypeULong];
|
|
}
|
|
[accelEnc endEncoding];
|
|
|
|
/* Estimated size of resources that will be wired for the GPU accelerated build.
|
|
* Acceleration-struct size is doubled to account for possible compaction step. */
|
|
size_t wired_size = posBuf.allocatedSize + indexBuf.allocatedSize + scratchBuf.allocatedSize +
|
|
accel_uncompressed.allocatedSize * 2;
|
|
|
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
|
|
/* free temp resources */
|
|
[scratchBuf release];
|
|
[indexBuf release];
|
|
[posBuf release];
|
|
|
|
if (use_fast_trace_bvh) {
|
|
/* Compact the accel structure */
|
|
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
|
|
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
id<MTLAccelerationStructure> accel = [mtl_device
|
|
newAccelerationStructureWithSize:compressed_size];
|
|
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
|
toAccelerationStructure:accel];
|
|
[accelEnc endEncoding];
|
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
|
|
set_accel_struct(accel);
|
|
[accel_uncompressed release];
|
|
|
|
/* Signal that we've finished doing GPU acceleration struct build. */
|
|
g_bvh_build_throttler.release(wired_size);
|
|
}];
|
|
[accelCommands commit];
|
|
});
|
|
}
|
|
else {
|
|
/* set our acceleration structure to the uncompressed structure */
|
|
set_accel_struct(accel_uncompressed);
|
|
|
|
/* Signal that we've finished doing GPU acceleration struct build. */
|
|
g_bvh_build_throttler.release(wired_size);
|
|
}
|
|
|
|
[sizeBuf release];
|
|
}];
|
|
|
|
/* Wait until it's safe to proceed with GPU acceleration struct build. */
|
|
g_bvh_build_throttler.acquire(wired_size);
|
|
[accelCommands commit];
|
|
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool BVHMetal::build_BLAS_hair(Progress &progress,
|
|
id<MTLDevice> mtl_device,
|
|
id<MTLCommandQueue> queue,
|
|
Geometry *const geom,
|
|
bool refit)
|
|
{
|
|
# if defined(MAC_OS_VERSION_14_0)
|
|
if (@available(macos 14.0, *)) {
|
|
/* Build BLAS for hair curves */
|
|
Hair *hair = static_cast<Hair *>(geom);
|
|
if (hair->num_curves() == 0) {
|
|
return false;
|
|
}
|
|
|
|
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
|
|
|
|
size_t num_motion_steps = 1;
|
|
Attribute *motion_keys = hair->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
|
if (motion_blur && hair->get_use_motion_blur() && motion_keys) {
|
|
num_motion_steps = hair->get_motion_steps();
|
|
}
|
|
|
|
id<MTLBuffer> cpBuffer = nil;
|
|
id<MTLBuffer> radiusBuffer = nil;
|
|
id<MTLBuffer> idxBuffer = nil;
|
|
|
|
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
|
if (num_motion_steps > 1) {
|
|
MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv =
|
|
[MTLAccelerationStructureMotionCurveGeometryDescriptor descriptor];
|
|
|
|
uint64_t numKeys = hair->num_keys();
|
|
uint64_t numCurves = hair->num_curves();
|
|
const array<float> &radiuses = hair->get_curve_radius();
|
|
|
|
/* Gather the curve geometry. */
|
|
std::vector<float3> cpData;
|
|
std::vector<int> idxData;
|
|
std::vector<float> radiusData;
|
|
cpData.reserve(numKeys);
|
|
radiusData.reserve(numKeys);
|
|
|
|
std::vector<int> step_offsets;
|
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
|
|
|
/* The center step for motion vertices is not stored in the attribute. */
|
|
const float3 *keys = hair->get_curve_keys().data();
|
|
size_t center_step = (num_motion_steps - 1) / 2;
|
|
if (step != center_step) {
|
|
size_t attr_offset = (step > center_step) ? step - 1 : step;
|
|
/* Technically this is a float4 array, but sizeof(float3) == sizeof(float4). */
|
|
keys = motion_keys->data_float3() + attr_offset * numKeys;
|
|
}
|
|
|
|
step_offsets.push_back(cpData.size());
|
|
|
|
for (int c = 0; c < numCurves; ++c) {
|
|
const Hair::Curve curve = hair->get_curve(c);
|
|
int segCount = curve.num_segments();
|
|
int firstKey = curve.first_key;
|
|
uint64_t idxBase = cpData.size();
|
|
cpData.push_back(keys[firstKey]);
|
|
radiusData.push_back(radiuses[firstKey]);
|
|
for (int s = 0; s < segCount; ++s) {
|
|
if (step == 0) {
|
|
idxData.push_back(idxBase + s);
|
|
}
|
|
cpData.push_back(keys[firstKey + s]);
|
|
radiusData.push_back(radiuses[firstKey + s]);
|
|
}
|
|
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
|
|
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
|
|
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
|
|
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
|
|
}
|
|
}
|
|
|
|
/* Allocate and populate MTLBuffers for geometry. */
|
|
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
|
|
length:idxData.size() * sizeof(int)
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
|
|
length:cpData.size() * sizeof(float3)
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
|
|
length:radiusData.size() * sizeof(float)
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
std::vector<MTLMotionKeyframeData *> cp_ptrs;
|
|
std::vector<MTLMotionKeyframeData *> radius_ptrs;
|
|
cp_ptrs.reserve(num_motion_steps);
|
|
radius_ptrs.reserve(num_motion_steps);
|
|
|
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
|
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
|
k.buffer = cpBuffer;
|
|
k.offset = step_offsets[step] * sizeof(float3);
|
|
cp_ptrs.push_back(k);
|
|
|
|
k = [MTLMotionKeyframeData data];
|
|
k.buffer = radiusBuffer;
|
|
k.offset = step_offsets[step] * sizeof(float);
|
|
radius_ptrs.push_back(k);
|
|
}
|
|
|
|
geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
|
|
count:cp_ptrs.size()];
|
|
geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
|
|
count:radius_ptrs.size()];
|
|
|
|
geomDescCrv.controlPointCount = cpData.size();
|
|
geomDescCrv.controlPointStride = sizeof(float3);
|
|
geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
|
|
geomDescCrv.radiusStride = sizeof(float);
|
|
geomDescCrv.radiusFormat = MTLAttributeFormatFloat;
|
|
geomDescCrv.segmentCount = idxData.size();
|
|
geomDescCrv.segmentControlPointCount = 4;
|
|
geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
|
|
MTLCurveTypeRound;
|
|
geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
|
|
geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
|
|
geomDescCrv.indexType = MTLIndexTypeUInt32;
|
|
geomDescCrv.indexBuffer = idxBuffer;
|
|
geomDescCrv.intersectionFunctionTableOffset = 1;
|
|
|
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
|
/* (Match optix behavior: unsigned int build_flags =
|
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
|
geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
|
|
geomDescCrv.opaque = true;
|
|
geomDesc = geomDescCrv;
|
|
}
|
|
else {
|
|
MTLAccelerationStructureCurveGeometryDescriptor *geomDescCrv =
|
|
[MTLAccelerationStructureCurveGeometryDescriptor descriptor];
|
|
|
|
uint64_t numKeys = hair->num_keys();
|
|
uint64_t numCurves = hair->num_curves();
|
|
const array<float> &radiuses = hair->get_curve_radius();
|
|
|
|
/* Gather the curve geometry. */
|
|
std::vector<float3> cpData;
|
|
std::vector<int> idxData;
|
|
std::vector<float> radiusData;
|
|
cpData.reserve(numKeys);
|
|
radiusData.reserve(numKeys);
|
|
auto keys = hair->get_curve_keys();
|
|
for (int c = 0; c < numCurves; ++c) {
|
|
const Hair::Curve curve = hair->get_curve(c);
|
|
int segCount = curve.num_segments();
|
|
int firstKey = curve.first_key;
|
|
radiusData.push_back(radiuses[firstKey]);
|
|
uint64_t idxBase = cpData.size();
|
|
cpData.push_back(keys[firstKey]);
|
|
for (int s = 0; s < segCount; ++s) {
|
|
idxData.push_back(idxBase + s);
|
|
cpData.push_back(keys[firstKey + s]);
|
|
radiusData.push_back(radiuses[firstKey + s]);
|
|
}
|
|
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
|
|
cpData.push_back(keys[firstKey + curve.num_keys - 1]);
|
|
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
|
|
radiusData.push_back(radiuses[firstKey + curve.num_keys - 1]);
|
|
}
|
|
|
|
/* Allocate and populate MTLBuffers for geometry. */
|
|
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
|
|
length:idxData.size() * sizeof(int)
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
|
|
length:cpData.size() * sizeof(float3)
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
|
|
length:radiusData.size() * sizeof(float)
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
geomDescCrv.controlPointBuffer = cpBuffer;
|
|
geomDescCrv.radiusBuffer = radiusBuffer;
|
|
geomDescCrv.controlPointCount = cpData.size();
|
|
geomDescCrv.controlPointStride = sizeof(float3);
|
|
geomDescCrv.controlPointFormat = MTLAttributeFormatFloat3;
|
|
geomDescCrv.controlPointBufferOffset = 0;
|
|
geomDescCrv.segmentCount = idxData.size();
|
|
geomDescCrv.segmentControlPointCount = 4;
|
|
geomDescCrv.curveType = (hair->curve_shape == CURVE_RIBBON) ? MTLCurveTypeFlat :
|
|
MTLCurveTypeRound;
|
|
geomDescCrv.curveBasis = MTLCurveBasisCatmullRom;
|
|
geomDescCrv.curveEndCaps = MTLCurveEndCapsDisk;
|
|
geomDescCrv.indexType = MTLIndexTypeUInt32;
|
|
geomDescCrv.indexBuffer = idxBuffer;
|
|
geomDescCrv.intersectionFunctionTableOffset = 1;
|
|
|
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
|
/* (Match optix behavior: unsigned int build_flags =
|
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
|
geomDescCrv.allowDuplicateIntersectionFunctionInvocation = false;
|
|
geomDescCrv.opaque = true;
|
|
geomDesc = geomDescCrv;
|
|
}
|
|
|
|
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
|
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
|
accelDesc.geometryDescriptors = @[ geomDesc ];
|
|
|
|
if (num_motion_steps > 1) {
|
|
accelDesc.motionStartTime = 0.0f;
|
|
accelDesc.motionEndTime = 1.0f;
|
|
accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
|
accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
|
accelDesc.motionKeyframeCount = num_motion_steps;
|
|
|
|
BVH_status("Building motion hair BLAS | %7d curves | %s | %7d motion keyframes",
|
|
(int)hair->num_curves(),
|
|
geom->name.c_str(),
|
|
(int)num_motion_steps);
|
|
}
|
|
else {
|
|
BVH_status(
|
|
"Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str());
|
|
}
|
|
|
|
if (!use_fast_trace_bvh) {
|
|
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
|
MTLAccelerationStructureUsagePreferFastBuild);
|
|
}
|
|
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
|
|
|
|
MTLAccelerationStructureSizes accelSizes = [mtl_device
|
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
|
id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
|
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
|
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
|
|
options:MTLResourceStorageModePrivate];
|
|
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
|
|
options:MTLResourceStorageModeShared];
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
if (refit) {
|
|
[accelEnc refitAccelerationStructure:accel_struct
|
|
descriptor:accelDesc
|
|
destination:accel_uncompressed
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
else {
|
|
[accelEnc buildAccelerationStructure:accel_uncompressed
|
|
descriptor:accelDesc
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
if (use_fast_trace_bvh) {
|
|
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
|
toBuffer:sizeBuf
|
|
offset:0
|
|
sizeDataType:MTLDataTypeULong];
|
|
}
|
|
[accelEnc endEncoding];
|
|
|
|
/* Estimated size of resources that will be wired for the GPU accelerated build.
|
|
* Acceleration-struct size is doubled to account for possible compaction step. */
|
|
size_t wired_size = cpBuffer.allocatedSize + radiusBuffer.allocatedSize +
|
|
idxBuffer.allocatedSize + scratchBuf.allocatedSize +
|
|
accel_uncompressed.allocatedSize * 2;
|
|
|
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
|
|
/* free temp resources */
|
|
[scratchBuf release];
|
|
[cpBuffer release];
|
|
[radiusBuffer release];
|
|
[idxBuffer release];
|
|
|
|
if (use_fast_trace_bvh) {
|
|
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
|
|
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
id<MTLAccelerationStructure> accel = [mtl_device
|
|
newAccelerationStructureWithSize:compressed_size];
|
|
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
|
toAccelerationStructure:accel];
|
|
[accelEnc endEncoding];
|
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
|
|
set_accel_struct(accel);
|
|
[accel_uncompressed release];
|
|
|
|
/* Signal that we've finished doing GPU acceleration struct build. */
|
|
g_bvh_build_throttler.release(wired_size);
|
|
}];
|
|
[accelCommands commit];
|
|
});
|
|
}
|
|
else {
|
|
/* set our acceleration structure to the uncompressed structure */
|
|
set_accel_struct(accel_uncompressed);
|
|
|
|
/* Signal that we've finished doing GPU acceleration struct build. */
|
|
g_bvh_build_throttler.release(wired_size);
|
|
}
|
|
|
|
[sizeBuf release];
|
|
}];
|
|
|
|
/* Wait until it's safe to proceed with GPU acceleration struct build. */
|
|
g_bvh_build_throttler.acquire(wired_size);
|
|
[accelCommands commit];
|
|
|
|
return true;
|
|
}
|
|
# else /* MAC_OS_VERSION_14_0 */
|
|
(void)progress;
|
|
(void)mtl_device;
|
|
(void)queue;
|
|
(void)geom;
|
|
(void)(refit);
|
|
# endif /* MAC_OS_VERSION_14_0 */
|
|
return false;
|
|
}
|
|
|
|
bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
|
|
id<MTLDevice> mtl_device,
|
|
id<MTLCommandQueue> queue,
|
|
Geometry *const geom,
|
|
bool refit)
|
|
{
|
|
if (@available(macos 12.0, *)) {
|
|
/* Build BLAS for point cloud */
|
|
PointCloud *pointcloud = static_cast<PointCloud *>(geom);
|
|
if (pointcloud->num_points() == 0) {
|
|
return false;
|
|
}
|
|
|
|
const size_t num_points = pointcloud->get_points().size();
|
|
const float3 *points = pointcloud->get_points().data();
|
|
const float *radius = pointcloud->get_radius().data();
|
|
|
|
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
|
|
|
|
size_t num_motion_steps = 1;
|
|
Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION);
|
|
if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) {
|
|
num_motion_steps = pointcloud->get_motion_steps();
|
|
}
|
|
|
|
const size_t num_aabbs = num_motion_steps * num_points;
|
|
|
|
/* Allocate a GPU buffer for the AABB data and populate it */
|
|
id<MTLBuffer> aabbBuf = [mtl_device
|
|
newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
|
|
options:MTLResourceStorageModeShared];
|
|
MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
|
|
|
|
/* Get AABBs for each motion step */
|
|
size_t center_step = (num_motion_steps - 1) / 2;
|
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
|
if (step == center_step) {
|
|
/* The center step for motion vertices is not stored in the attribute */
|
|
for (size_t j = 0; j < num_points; ++j) {
|
|
const PointCloud::Point point = pointcloud->get_point(j);
|
|
BoundBox bounds = BoundBox::empty;
|
|
point.bounds_grow(points, radius, bounds);
|
|
|
|
const size_t index = step * num_points + j;
|
|
aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
|
|
aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
|
|
}
|
|
}
|
|
else {
|
|
size_t attr_offset = (step > center_step) ? step - 1 : step;
|
|
float4 *motion_points = motion_keys->data_float4() + attr_offset * num_points;
|
|
|
|
for (size_t j = 0; j < num_points; ++j) {
|
|
const PointCloud::Point point = pointcloud->get_point(j);
|
|
BoundBox bounds = BoundBox::empty;
|
|
point.bounds_grow(motion_points[j], bounds);
|
|
|
|
const size_t index = step * num_points + j;
|
|
aabb_data[index].min = (MTLPackedFloat3 &)bounds.min;
|
|
aabb_data[index].max = (MTLPackedFloat3 &)bounds.max;
|
|
}
|
|
}
|
|
}
|
|
|
|
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
|
if (num_motion_steps > 1) {
|
|
std::vector<MTLMotionKeyframeData *> aabb_ptrs;
|
|
aabb_ptrs.reserve(num_motion_steps);
|
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
|
MTLMotionKeyframeData *k = [MTLMotionKeyframeData data];
|
|
k.buffer = aabbBuf;
|
|
k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox);
|
|
aabb_ptrs.push_back(k);
|
|
}
|
|
|
|
MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion =
|
|
[MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor];
|
|
geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data()
|
|
count:aabb_ptrs.size()];
|
|
geomDescMotion.boundingBoxCount = num_points;
|
|
geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
|
geomDescMotion.intersectionFunctionTableOffset = 2;
|
|
|
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
|
/* (Match optix behavior: unsigned int build_flags =
|
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
|
geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
|
geomDescMotion.opaque = true;
|
|
geomDesc = geomDescMotion;
|
|
}
|
|
else {
|
|
MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion =
|
|
[MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor];
|
|
geomDescNoMotion.boundingBoxBuffer = aabbBuf;
|
|
geomDescNoMotion.boundingBoxBufferOffset = 0;
|
|
geomDescNoMotion.boundingBoxCount = int(num_aabbs);
|
|
geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]);
|
|
geomDescNoMotion.intersectionFunctionTableOffset = 2;
|
|
|
|
/* Force a single any-hit call, so shadow record-all behavior works correctly */
|
|
/* (Match optix behavior: unsigned int build_flags =
|
|
* OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */
|
|
geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false;
|
|
geomDescNoMotion.opaque = true;
|
|
geomDesc = geomDescNoMotion;
|
|
}
|
|
|
|
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
|
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
|
accelDesc.geometryDescriptors = @[ geomDesc ];
|
|
|
|
if (num_motion_steps > 1) {
|
|
accelDesc.motionStartTime = 0.0f;
|
|
accelDesc.motionEndTime = 1.0f;
|
|
// accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
|
// accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
|
accelDesc.motionKeyframeCount = num_motion_steps;
|
|
|
|
BVH_status("Building motion pointcloud BLAS | %7d points | %s | %7d motion keyframes",
|
|
(int)pointcloud->num_points(),
|
|
geom->name.c_str(),
|
|
(int)num_motion_steps);
|
|
}
|
|
else {
|
|
BVH_status("Building pointcloud BLAS | %7d points | %s",
|
|
(int)pointcloud->num_points(),
|
|
geom->name.c_str());
|
|
}
|
|
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
|
|
|
|
if (!use_fast_trace_bvh) {
|
|
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
|
MTLAccelerationStructureUsagePreferFastBuild);
|
|
}
|
|
|
|
MTLAccelerationStructureSizes accelSizes = [mtl_device
|
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
|
id<MTLAccelerationStructure> accel_uncompressed = [mtl_device
|
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
|
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
|
|
options:MTLResourceStorageModePrivate];
|
|
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
|
|
options:MTLResourceStorageModeShared];
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
if (refit) {
|
|
[accelEnc refitAccelerationStructure:accel_struct
|
|
descriptor:accelDesc
|
|
destination:accel_uncompressed
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
else {
|
|
[accelEnc buildAccelerationStructure:accel_uncompressed
|
|
descriptor:accelDesc
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
if (use_fast_trace_bvh) {
|
|
[accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed
|
|
toBuffer:sizeBuf
|
|
offset:0
|
|
sizeDataType:MTLDataTypeULong];
|
|
}
|
|
[accelEnc endEncoding];
|
|
|
|
/* Estimated size of resources that will be wired for the GPU accelerated build.
|
|
* Acceleration-struct size is doubled to account for possible compaction step. */
|
|
size_t wired_size = aabbBuf.allocatedSize + scratchBuf.allocatedSize +
|
|
accel_uncompressed.allocatedSize * 2;
|
|
|
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
|
|
/* free temp resources */
|
|
[scratchBuf release];
|
|
[aabbBuf release];
|
|
|
|
if (use_fast_trace_bvh) {
|
|
/* Compact the accel structure */
|
|
uint64_t compressed_size = *(uint64_t *)sizeBuf.contents;
|
|
|
|
dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
id<MTLAccelerationStructure> accel = [mtl_device
|
|
newAccelerationStructureWithSize:compressed_size];
|
|
[accelEnc copyAndCompactAccelerationStructure:accel_uncompressed
|
|
toAccelerationStructure:accel];
|
|
[accelEnc endEncoding];
|
|
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
|
|
set_accel_struct(accel);
|
|
[accel_uncompressed release];
|
|
|
|
/* Signal that we've finished doing GPU acceleration struct build. */
|
|
g_bvh_build_throttler.release(wired_size);
|
|
}];
|
|
[accelCommands commit];
|
|
});
|
|
}
|
|
else {
|
|
/* set our acceleration structure to the uncompressed structure */
|
|
set_accel_struct(accel_uncompressed);
|
|
|
|
/* Signal that we've finished doing GPU acceleration struct build. */
|
|
g_bvh_build_throttler.release(wired_size);
|
|
}
|
|
|
|
[sizeBuf release];
|
|
}];
|
|
|
|
/* Wait until it's safe to proceed with GPU acceleration struct build. */
|
|
g_bvh_build_throttler.acquire(wired_size);
|
|
[accelCommands commit];
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool BVHMetal::build_BLAS(Progress &progress,
|
|
id<MTLDevice> mtl_device,
|
|
id<MTLCommandQueue> queue,
|
|
bool refit)
|
|
{
|
|
assert(objects.size() == 1 && geometry.size() == 1);
|
|
|
|
/* Build bottom level acceleration structures (BLAS) */
|
|
Geometry *const geom = geometry[0];
|
|
switch (geom->geometry_type) {
|
|
case Geometry::VOLUME:
|
|
case Geometry::MESH:
|
|
return build_BLAS_mesh(progress, mtl_device, queue, geom, refit);
|
|
case Geometry::HAIR:
|
|
return build_BLAS_hair(progress, mtl_device, queue, geom, refit);
|
|
case Geometry::POINTCLOUD:
|
|
return build_BLAS_pointcloud(progress, mtl_device, queue, geom, refit);
|
|
default:
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
# if defined(MAC_OS_VERSION_15_0)
|
|
|
|
/* Return MTLComponentTransform from a DecomposedTransform. */
|
|
static MTLComponentTransform decomposed_to_component_transform(const DecomposedTransform &src)
|
|
{
|
|
MTLComponentTransform tfm;
|
|
tfm.scale = MTLPackedFloat3Make(src.y.w, src.z.w, src.w.w);
|
|
tfm.shear = MTLPackedFloat3Make(src.z.x, src.z.y, src.w.x);
|
|
tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
|
|
tfm.rotation = MTLPackedFloatQuaternionMake(src.x.x, src.x.y, src.x.z, src.x.w);
|
|
tfm.translation = MTLPackedFloat3Make(src.y.x, src.y.y, src.y.z);
|
|
return tfm;
|
|
}
|
|
|
|
/* Return unit MTLComponentTransform. */
|
|
static MTLComponentTransform component_transform_make_unit()
|
|
{
|
|
MTLComponentTransform tfm;
|
|
tfm.scale = MTLPackedFloat3Make(1.0f, 1.0f, 1.0f);
|
|
tfm.shear = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
|
|
tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
|
|
tfm.rotation = MTLPackedFloatQuaternionMake(0.0f, 0.0f, 0.0f, 1.0f);
|
|
tfm.translation = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f);
|
|
return tfm;
|
|
}
|
|
|
|
# endif
|
|
|
|
bool BVHMetal::build_TLAS(Progress &progress,
|
|
id<MTLDevice> mtl_device,
|
|
id<MTLCommandQueue> queue,
|
|
bool refit)
|
|
{
|
|
/* Wait for all BLAS builds to finish. */
|
|
g_bvh_build_throttler.wait_for_all();
|
|
|
|
if (@available(macos 12.0, *)) {
|
|
/* Defined inside available check, for return type to be available. */
|
|
auto make_null_BLAS = [](id<MTLDevice> mtl_device,
|
|
id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
|
|
id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3)
|
|
options:MTLResourceStorageModeShared];
|
|
|
|
/* Create an acceleration structure. */
|
|
MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
|
|
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
|
|
geomDesc.vertexBuffer = nullBuf;
|
|
geomDesc.vertexBufferOffset = 0;
|
|
geomDesc.vertexStride = sizeof(float3);
|
|
geomDesc.indexBuffer = nullBuf;
|
|
geomDesc.indexBufferOffset = 0;
|
|
geomDesc.indexType = MTLIndexTypeUInt32;
|
|
geomDesc.triangleCount = 0;
|
|
geomDesc.intersectionFunctionTableOffset = 0;
|
|
geomDesc.opaque = true;
|
|
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
|
|
|
|
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
|
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
|
accelDesc.geometryDescriptors = @[ geomDesc ];
|
|
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
|
|
|
|
MTLAccelerationStructureSizes accelSizes = [mtl_device
|
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
|
id<MTLAccelerationStructure> accel_struct = [mtl_device
|
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
|
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
|
|
options:MTLResourceStorageModePrivate];
|
|
id<MTLBuffer> sizeBuf = [mtl_device newBufferWithLength:8
|
|
options:MTLResourceStorageModeShared];
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
[accelEnc buildAccelerationStructure:accel_struct
|
|
descriptor:accelDesc
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
[accelEnc endEncoding];
|
|
[accelCommands commit];
|
|
[accelCommands waitUntilCompleted];
|
|
|
|
/* free temp resources */
|
|
[scratchBuf release];
|
|
[nullBuf release];
|
|
[sizeBuf release];
|
|
|
|
return accel_struct;
|
|
};
|
|
|
|
uint32_t num_instances = 0;
|
|
uint32_t num_motion_transforms = 0;
|
|
uint32_t num_motion_instances = 0;
|
|
for (Object *ob : objects) {
|
|
num_instances++;
|
|
|
|
if (ob->use_motion()) {
|
|
num_motion_transforms += max((size_t)1, ob->get_motion().size());
|
|
num_motion_instances++;
|
|
}
|
|
else {
|
|
num_motion_transforms++;
|
|
}
|
|
}
|
|
|
|
if (num_instances == 0) {
|
|
return false;
|
|
}
|
|
|
|
const bool use_instance_motion = motion_blur && num_motion_instances;
|
|
const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas();
|
|
|
|
NSMutableArray *all_blas = [NSMutableArray array];
|
|
unordered_map<const BVHMetal *, int> instance_mapping;
|
|
|
|
/* Lambda function to build/retrieve the BLAS index mapping */
|
|
auto get_blas_index = [&](const BVHMetal *blas) {
|
|
auto it = instance_mapping.find(blas);
|
|
if (it != instance_mapping.end()) {
|
|
return it->second;
|
|
}
|
|
int blas_index = (int)[all_blas count];
|
|
instance_mapping[blas] = blas_index;
|
|
if (@available(macos 12.0, *)) {
|
|
[all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
|
|
}
|
|
return blas_index;
|
|
};
|
|
|
|
size_t instance_size;
|
|
if (use_instance_motion) {
|
|
instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
|
|
}
|
|
else {
|
|
instance_size = sizeof(MTLAccelerationStructureUserIDInstanceDescriptor);
|
|
}
|
|
|
|
/* Allocate a GPU buffer for the instance data and populate it */
|
|
id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
|
|
options:MTLResourceStorageModeShared];
|
|
id<MTLBuffer> motion_transforms_buf = nil;
|
|
MTLPackedFloat4x3 *matrix_motion_transforms = nullptr;
|
|
# if defined(MAC_OS_VERSION_15_0)
|
|
MTLComponentTransform *decomposed_motion_transforms = nullptr;
|
|
# endif
|
|
if (use_instance_motion && num_motion_transforms) {
|
|
# if defined(MAC_OS_VERSION_15_0)
|
|
if (use_pcmi) {
|
|
if (@available(macos 15.0, *)) {
|
|
motion_transforms_buf = [mtl_device
|
|
newBufferWithLength:num_motion_transforms * sizeof(MTLComponentTransform)
|
|
options:MTLResourceStorageModeShared];
|
|
decomposed_motion_transforms = (MTLComponentTransform *)motion_transforms_buf.contents;
|
|
}
|
|
}
|
|
else
|
|
# endif
|
|
{
|
|
motion_transforms_buf = [mtl_device
|
|
newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
|
|
options:MTLResourceStorageModeShared];
|
|
matrix_motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
|
|
}
|
|
}
|
|
|
|
uint32_t instance_index = 0;
|
|
uint32_t motion_transform_index = 0;
|
|
|
|
blas_array.clear();
|
|
blas_array.reserve(num_instances);
|
|
|
|
for (Object *ob : objects) {
|
|
/* Skip non-traceable objects */
|
|
const Geometry *geom = ob->get_geometry();
|
|
const BVHMetal *blas = static_cast<const BVHMetal *>(geom->bvh.get());
|
|
if (!blas || !blas->accel_struct || !ob->is_traceable()) {
|
|
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
|
|
* in our intersection functions */
|
|
blas = nullptr;
|
|
|
|
/* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
|
|
* the descriptor. */
|
|
if (!null_BLAS) {
|
|
null_BLAS = make_null_BLAS(mtl_device, queue);
|
|
}
|
|
blas_array.push_back(null_BLAS);
|
|
}
|
|
else {
|
|
blas_array.push_back(blas->accel_struct);
|
|
}
|
|
|
|
uint32_t accel_struct_index = get_blas_index(blas);
|
|
|
|
/* Add some of the object visibility bits to the mask.
|
|
* __prim_visibility contains the combined visibility bits of all instances, so is not
|
|
* reliable if they differ between instances.
|
|
*/
|
|
uint32_t mask = ob->visibility_for_tracing();
|
|
|
|
/* Have to have at least one bit in the mask, or else instance would always be culled. */
|
|
if (0 == mask) {
|
|
mask = 0xFF;
|
|
}
|
|
|
|
/* Set user instance ID to object index */
|
|
uint32_t primitive_offset = 0;
|
|
int currIndex = instance_index++;
|
|
|
|
if (geom->is_hair()) {
|
|
/* Build BLAS for curve primitives. */
|
|
Hair *const hair = static_cast<Hair *const>(const_cast<Geometry *>(geom));
|
|
primitive_offset = uint32_t(hair->curve_segment_offset);
|
|
}
|
|
else if (geom->is_mesh() || geom->is_volume()) {
|
|
/* Build BLAS for triangle primitives. */
|
|
Mesh *const mesh = static_cast<Mesh *const>(const_cast<Geometry *>(geom));
|
|
primitive_offset = uint32_t(mesh->prim_offset);
|
|
}
|
|
else if (geom->is_pointcloud()) {
|
|
/* Build BLAS for points primitives. */
|
|
PointCloud *const pointcloud = static_cast<PointCloud *const>(
|
|
const_cast<Geometry *>(geom));
|
|
primitive_offset = uint32_t(pointcloud->prim_offset);
|
|
}
|
|
|
|
/* Bake into the appropriate descriptor */
|
|
if (use_instance_motion) {
|
|
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
|
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
|
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex];
|
|
|
|
desc.accelerationStructureIndex = accel_struct_index;
|
|
desc.userID = primitive_offset;
|
|
desc.mask = mask;
|
|
desc.motionStartTime = 0.0f;
|
|
desc.motionEndTime = 1.0f;
|
|
desc.motionTransformsStartIndex = motion_transform_index;
|
|
desc.motionStartBorderMode = MTLMotionBorderModeVanish;
|
|
desc.motionEndBorderMode = MTLMotionBorderModeVanish;
|
|
desc.intersectionFunctionTableOffset = 0;
|
|
|
|
array<DecomposedTransform> decomp(ob->get_motion().size());
|
|
transform_motion_decompose(
|
|
decomp.data(), ob->get_motion().data(), ob->get_motion().size());
|
|
|
|
int key_count = ob->get_motion().size();
|
|
if (key_count) {
|
|
desc.motionTransformsCount = key_count;
|
|
|
|
# if defined(MAC_OS_VERSION_15_0)
|
|
if (use_pcmi) {
|
|
for (int i = 0; i < key_count; i++) {
|
|
decomposed_motion_transforms[motion_transform_index++] =
|
|
decomposed_to_component_transform(decomp[i]);
|
|
}
|
|
}
|
|
else
|
|
# endif
|
|
{
|
|
Transform *keys = ob->get_motion().data();
|
|
for (int i = 0; i < key_count; i++) {
|
|
float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
|
|
/* Transpose transform */
|
|
const auto *src = (const float *)&keys[i];
|
|
for (int i = 0; i < 12; i++) {
|
|
t[i] = src[(i / 3) + 4 * (i % 3)];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
desc.motionTransformsCount = 1;
|
|
|
|
# if defined(MAC_OS_VERSION_15_0)
|
|
if (use_pcmi) {
|
|
if (ob->get_geometry()->is_instanced()) {
|
|
decomposed_motion_transforms[motion_transform_index++] =
|
|
decomposed_to_component_transform(decomp[0]);
|
|
}
|
|
else {
|
|
decomposed_motion_transforms[motion_transform_index++] =
|
|
component_transform_make_unit();
|
|
}
|
|
}
|
|
else
|
|
# endif
|
|
{
|
|
float *t = (float *)&matrix_motion_transforms[motion_transform_index++];
|
|
if (ob->get_geometry()->is_instanced()) {
|
|
/* Transpose transform */
|
|
const auto *src = (const float *)&ob->get_tfm();
|
|
for (int i = 0; i < 12; i++) {
|
|
t[i] = src[(i / 3) + 4 * (i % 3)];
|
|
}
|
|
}
|
|
else {
|
|
/* Clear transform to identity matrix */
|
|
t[0] = t[4] = t[8] = 1.0f;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
|
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
|
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[currIndex];
|
|
|
|
desc.accelerationStructureIndex = accel_struct_index;
|
|
desc.userID = primitive_offset;
|
|
desc.mask = mask;
|
|
desc.intersectionFunctionTableOffset = 0;
|
|
desc.options = MTLAccelerationStructureInstanceOptionOpaque;
|
|
|
|
float *t = (float *)&desc.transformationMatrix;
|
|
if (ob->get_geometry()->is_instanced()) {
|
|
/* Transpose transform */
|
|
const auto *src = (const float *)&ob->get_tfm();
|
|
for (int i = 0; i < 12; i++) {
|
|
t[i] = src[(i / 3) + 4 * (i % 3)];
|
|
}
|
|
}
|
|
else {
|
|
/* Clear transform to identity matrix */
|
|
t[0] = t[4] = t[8] = 1.0f;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (use_instance_motion) {
|
|
BVH_status(
|
|
"Building motion TLAS | %7d instances | %7d motion instances | %7d motion "
|
|
"transforms",
|
|
(int)num_instances,
|
|
(int)num_motion_instances,
|
|
(int)num_motion_transforms);
|
|
}
|
|
else {
|
|
BVH_status("Building TLAS | %7d instances", (int)num_instances);
|
|
}
|
|
|
|
MTLInstanceAccelerationStructureDescriptor *accelDesc =
|
|
[MTLInstanceAccelerationStructureDescriptor descriptor];
|
|
accelDesc.instanceCount = num_instances;
|
|
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeUserID;
|
|
accelDesc.instanceDescriptorBuffer = instanceBuf;
|
|
accelDesc.instanceDescriptorBufferOffset = 0;
|
|
accelDesc.instanceDescriptorStride = instance_size;
|
|
accelDesc.instancedAccelerationStructures = all_blas;
|
|
|
|
if (use_instance_motion) {
|
|
accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion;
|
|
accelDesc.motionTransformBuffer = motion_transforms_buf;
|
|
accelDesc.motionTransformCount = num_motion_transforms;
|
|
# if defined(MAC_OS_VERSION_15_0)
|
|
if (@available(macos 15.0, *)) {
|
|
accelDesc.motionTransformStride = 0;
|
|
accelDesc.motionTransformType = use_pcmi ? MTLTransformTypeComponent :
|
|
MTLTransformTypePackedFloat4x3;
|
|
}
|
|
# endif
|
|
}
|
|
|
|
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
|
|
if (!use_fast_trace_bvh) {
|
|
accelDesc.usage |= (MTLAccelerationStructureUsageRefit |
|
|
MTLAccelerationStructureUsagePreferFastBuild);
|
|
}
|
|
|
|
MTLAccelerationStructureSizes accelSizes = [mtl_device
|
|
accelerationStructureSizesWithDescriptor:accelDesc];
|
|
id<MTLAccelerationStructure> accel = [mtl_device
|
|
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
|
id<MTLBuffer> scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize
|
|
options:MTLResourceStorageModePrivate];
|
|
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
|
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
|
[accelCommands accelerationStructureCommandEncoder];
|
|
if (refit) {
|
|
[accelEnc refitAccelerationStructure:accel_struct
|
|
descriptor:accelDesc
|
|
destination:accel
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
else {
|
|
[accelEnc buildAccelerationStructure:accel
|
|
descriptor:accelDesc
|
|
scratchBuffer:scratchBuf
|
|
scratchBufferOffset:0];
|
|
}
|
|
[accelEnc endEncoding];
|
|
[accelCommands commit];
|
|
[accelCommands waitUntilCompleted];
|
|
|
|
if (motion_transforms_buf) {
|
|
[motion_transforms_buf release];
|
|
}
|
|
[instanceBuf release];
|
|
[scratchBuf release];
|
|
|
|
/* Cache top and bottom-level acceleration structs */
|
|
set_accel_struct(accel);
|
|
|
|
unique_blas_array.clear();
|
|
unique_blas_array.reserve(all_blas.count);
|
|
[all_blas enumerateObjectsUsingBlock:^(id<MTLAccelerationStructure> blas, NSUInteger, BOOL *) {
|
|
unique_blas_array.push_back(blas);
|
|
}];
|
|
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool BVHMetal::build(Progress &progress,
|
|
id<MTLDevice> mtl_device,
|
|
id<MTLCommandQueue> queue,
|
|
bool refit)
|
|
{
|
|
if (@available(macos 12.0, *)) {
|
|
if (refit) {
|
|
/* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
|
|
* In such cases, assert in development but try to recover in the wild. */
|
|
if (params.bvh_type != BVH_TYPE_DYNAMIC) {
|
|
assert(!"Can't refit static Metal BVH");
|
|
refit = false;
|
|
}
|
|
else if (!accel_struct) {
|
|
assert(!"Can't refit non-existing Metal BVH");
|
|
refit = false;
|
|
}
|
|
}
|
|
|
|
if (!refit) {
|
|
set_accel_struct(nil);
|
|
}
|
|
}
|
|
|
|
if (!support_refit_blas()) {
|
|
refit = false;
|
|
}
|
|
|
|
@autoreleasepool {
|
|
if (!params.top_level) {
|
|
return build_BLAS(progress, mtl_device, queue, refit);
|
|
}
|
|
return build_TLAS(progress, mtl_device, queue, refit);
|
|
}
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
#endif /* WITH_METAL */
|