Cycles: Workaround MetalRT TLAS build hanging in some motion blur scenes
This PR works around an issue where zero-filled motion TLAS instance descriptors can cause unexpected hangs during downstream TLAS builds on M3. Instead of zeroing the descriptor we insert an explicit "null" BLAS, achieving the same result. Pull Request: https://projects.blender.org/blender/blender/pulls/114544
This commit is contained in:
committed by
Michael Jones (Apple)
parent
f1116f64bd
commit
4f52ab0b49
@@ -19,6 +19,9 @@ class BVHMetal : public BVH {
|
||||
API_AVAILABLE(macos(11.0))
|
||||
id<MTLAccelerationStructure> accel_struct = nil;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
id<MTLAccelerationStructure> null_BLAS = nil;
|
||||
|
||||
API_AVAILABLE(macos(11.0))
|
||||
vector<id<MTLAccelerationStructure>> blas_array;
|
||||
|
||||
|
||||
@@ -124,6 +124,66 @@ BVHMetal::~BVHMetal()
|
||||
stats.mem_free(accel_struct.allocatedSize);
|
||||
[accel_struct release];
|
||||
}
|
||||
|
||||
if (null_BLAS) {
|
||||
[null_BLAS release];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
id<MTLAccelerationStructure> make_null_BLAS(id<MTLDevice> device, id<MTLCommandQueue> queue)
|
||||
{
|
||||
if (@available(macos 12.0, *)) {
|
||||
MTLResourceOptions storage_mode = MTLResourceStorageModeManaged;
|
||||
if (device.hasUnifiedMemory) {
|
||||
storage_mode = MTLResourceStorageModeShared;
|
||||
}
|
||||
|
||||
id<MTLBuffer> nullBuf = [device newBufferWithLength:0 options:storage_mode];
|
||||
|
||||
/* Create an acceleration structure. */
|
||||
MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
|
||||
[MTLAccelerationStructureTriangleGeometryDescriptor descriptor];
|
||||
geomDesc.vertexBuffer = nullBuf;
|
||||
geomDesc.vertexBufferOffset = 0;
|
||||
geomDesc.vertexStride = sizeof(float3);
|
||||
geomDesc.indexBuffer = nullBuf;
|
||||
geomDesc.indexBufferOffset = 0;
|
||||
geomDesc.indexType = MTLIndexTypeUInt32;
|
||||
geomDesc.triangleCount = 0;
|
||||
geomDesc.intersectionFunctionTableOffset = 0;
|
||||
geomDesc.opaque = true;
|
||||
geomDesc.allowDuplicateIntersectionFunctionInvocation = false;
|
||||
|
||||
MTLPrimitiveAccelerationStructureDescriptor *accelDesc =
|
||||
[MTLPrimitiveAccelerationStructureDescriptor descriptor];
|
||||
accelDesc.geometryDescriptors = @[ geomDesc ];
|
||||
accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits;
|
||||
|
||||
MTLAccelerationStructureSizes accelSizes = [device
|
||||
accelerationStructureSizesWithDescriptor:accelDesc];
|
||||
id<MTLAccelerationStructure> accel_struct = [device
|
||||
newAccelerationStructureWithSize:accelSizes.accelerationStructureSize];
|
||||
id<MTLBuffer> scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize
|
||||
options:MTLResourceStorageModePrivate];
|
||||
id<MTLBuffer> sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared];
|
||||
id<MTLCommandBuffer> accelCommands = [queue commandBuffer];
|
||||
id<MTLAccelerationStructureCommandEncoder> accelEnc =
|
||||
[accelCommands accelerationStructureCommandEncoder];
|
||||
[accelEnc buildAccelerationStructure:accel_struct
|
||||
descriptor:accelDesc
|
||||
scratchBuffer:scratchBuf
|
||||
scratchBufferOffset:0];
|
||||
[accelEnc endEncoding];
|
||||
[accelCommands commit];
|
||||
[accelCommands waitUntilCompleted];
|
||||
|
||||
/* free temp resources */
|
||||
[scratchBuf release];
|
||||
[nullBuf release];
|
||||
[sizeBuf release];
|
||||
|
||||
return accel_struct;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1005,7 +1065,7 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
||||
int blas_index = (int)[all_blas count];
|
||||
instance_mapping[blas] = blas_index;
|
||||
if (@available(macos 12.0, *)) {
|
||||
[all_blas addObject:blas->accel_struct];
|
||||
[all_blas addObject:(blas ? blas->accel_struct : null_BLAS)];
|
||||
}
|
||||
return blas_index;
|
||||
}
|
||||
@@ -1052,22 +1112,18 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
||||
if (!blas || !blas->accel_struct) {
|
||||
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
|
||||
* in our intersection functions */
|
||||
if (motion_blur) {
|
||||
MTLAccelerationStructureMotionInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[instance_index++];
|
||||
memset(&desc, 0x00, sizeof(desc));
|
||||
blas = nullptr;
|
||||
|
||||
/* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling
|
||||
* the descriptor. */
|
||||
if (!null_BLAS) {
|
||||
null_BLAS = make_null_BLAS(device, queue);
|
||||
}
|
||||
else {
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor *instances =
|
||||
(MTLAccelerationStructureUserIDInstanceDescriptor *)[instanceBuf contents];
|
||||
MTLAccelerationStructureUserIDInstanceDescriptor &desc = instances[instance_index++];
|
||||
memset(&desc, 0x00, sizeof(desc));
|
||||
}
|
||||
blas_array.push_back(nil);
|
||||
continue;
|
||||
blas_array.push_back(null_BLAS);
|
||||
}
|
||||
else {
|
||||
blas_array.push_back(blas->accel_struct);
|
||||
}
|
||||
blas_array.push_back(blas->accel_struct);
|
||||
|
||||
uint32_t accel_struct_index = get_blas_index(blas);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user