Cycles: Workaround MetalRT TLAS build hanging in some motion blur scenes

This PR works around an issue where zero-filled motion TLAS instance descriptors can cause unexpected hangs during downstream TLAS builds on M3. Instead of zeroing the descriptor we insert an explicit "null" BLAS, achieving the same result.

Pull Request: https://projects.blender.org/blender/blender/pulls/114544
This commit is contained in:
Michael Jones
2023-11-06 17:30:48 +01:00
committed by Michael Jones (Apple)
parent f1116f64bd
commit 4f52ab0b49
2 changed files with 74 additions and 15 deletions

View File

@@ -19,6 +19,9 @@ class BVHMetal : public BVH {
API_AVAILABLE(macos(11.0))
id<MTLAccelerationStructure> accel_struct = nil;
API_AVAILABLE(macos(11.0))
id<MTLAccelerationStructure> null_BLAS = nil;
API_AVAILABLE(macos(11.0))
vector<id<MTLAccelerationStructure>> blas_array;

View File

@@ -124,6 +124,66 @@ BVHMetal::~BVHMetal()
stats.mem_free(accel_struct.allocatedSize);
[accel_struct release];
}
if (null_BLAS) {
[null_BLAS release];
}
}
}
id<MTLAccelerationStructure> make_null_BLAS(id<MTLDevice> device, id<MTLCommandQueue> queue)
{
if (@available(macos 12.0, *)) {
MTLResourceOptions storage_mode = MTLResourceStorageModeManaged;
if (device.hasUnifiedMemory) {
storage_mode = MTLResourceStorageModeShared;
}
id<MTLBuffer> nullBuf = [device newBufferWithLength:0 options:storage_mode];
/* Create an acceleration structure. */
MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
geomDesc.vertexBuffer = nullBuf;
geomDesc.vertexBufferOffset = 0;
geomDesc.vertexStride = sizeof(float3);
geomDesc.indexBuffer = nullBuf;
geomDesc.indexBufferOffset = 0;
geomDesc.indexType = MTLIndexTypeUInt32;
geomDesc.triangleCount = 0;
geomDesc.intersectionFunctionTableOffset = 0;
geomDesc.opaque = true;
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
accelDesc.geometryDescriptors = @[ geomDesc ];
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
MTLAccelerationStructureSizes accelSizes = [device
accelerationStructureSizesWithDescriptor:accelDesc];
id<MTLAccelerationStructure> accel_struct = [device
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
options:MTLResourceStorageModePrivate];
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
id<MTLAccelerationStructureCommandEncoder> accelEnc =
[accelCommands accelerationStructureCommandEncoder];
[accelEnc buildAccelerationStructure:accel_struct
descriptor:accelDesc
scratchBuffer:scratchBuf
scratchBufferOffset:0];
[accelEnc endEncoding];
[accelCommands commit];
[accelCommands waitUntilCompleted];
/* free temp resources */
[scratchBuf release];
[nullBuf release];
[sizeBuf release];
return accel_struct;
}
}
@@ -1005,7 +1065,7 @@ bool BVHMetal::build_TLAS(Progress &progress,
int blas_index = (int)[all_blas count];
instance_mapping[blas] = blas_index;
if (@available(macos 12.0, *)) {
[all_blas addObject:blas->accel_struct];
[all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
}
return blas_index;
}
@@ -1052,22 +1112,18 @@ bool BVHMetal::build_TLAS(Progress &progress,
if (!blas || !blas->accel_struct) {
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
* in our intersection functions */
if (motion_blur) {
MTLAccelerationStructureMotionInstanceDescriptor *instances =
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
memset(&desc, 0x00, sizeof(desc));
blas = nullptr;
/* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
* the descriptor. */
if (!null_BLAS) {
null_BLAS = make_null_BLAS(device, queue);
}
else {
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
memset(&desc, 0x00, sizeof(desc));
}
blas_array.push_back(nil);
continue;
blas_array.push_back(null_BLAS);
}
else {
blas_array.push_back(blas->accel_struct);
}
blas_array.push_back(blas->accel_struct);
uint32_t accel_struct_index = get_blas_index(blas);