diff --git a/intern/cycles/bvh/optix.cpp b/intern/cycles/bvh/optix.cpp index 56b4a5aa1d6..8c8259c9722 100644 --- a/intern/cycles/bvh/optix.cpp +++ b/intern/cycles/bvh/optix.cpp @@ -29,7 +29,7 @@ BVHOptiX::~BVHOptiX() { /* Acceleration structure memory is delayed freed on device, since deleting the * BVH may happen while still being used for rendering. */ - device->release_optix_bvh(this); + device->release_bvh(this); } CCL_NAMESPACE_END diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index deefe0404d2..5260de1b63b 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -217,11 +217,10 @@ class Device { /* Get OpenShadingLanguage memory buffer. */ virtual void *get_cpu_osl_memory(); - /* acceleration structure building */ + /* Acceleration structure building. */ virtual void build_bvh(BVH *bvh, Progress &progress, bool refit); - - /* OptiX specific destructor. */ - virtual void release_optix_bvh(BVH * /*bvh*/){}; + /* Used by Metal and OptiX. */ + virtual void release_bvh(BVH * /*bvh*/) {} /* multi device */ virtual int device_number(Device * /*sub_device*/) diff --git a/intern/cycles/device/metal/bvh.h b/intern/cycles/device/metal/bvh.h index 46c810c9b72..44fe4c4cde2 100644 --- a/intern/cycles/device/metal/bvh.h +++ b/intern/cycles/device/metal/bvh.h @@ -28,9 +28,9 @@ class BVHMetal : public BVH { API_AVAILABLE(macos(11.0)) vector> unique_blas_array; - bool motion_blur = false; + Device *device = nullptr; - Stats &stats; + bool motion_blur = false; bool build(Progress &progress, id device, id queue, bool refit); diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm index 39565e65662..bf78f7d07f4 100644 --- a/intern/cycles/device/metal/bvh.mm +++ b/intern/cycles/device/metal/bvh.mm @@ -113,15 +113,18 @@ BVHMetal::BVHMetal(const BVHParams ¶ms_, const vector &geometry_, const vector &objects_, Device *device) - : BVH(params_, geometry_, objects_), stats(device->stats) + : BVH(params_, geometry_, objects_), device(device) { } BVHMetal::~BVHMetal() { + /* Clear point used by enqueuing. */ + device->release_bvh(this); + if (@available(macos 12.0, *)) { if (accel_struct) { - stats.mem_free(accel_struct.allocatedSize); + device->stats.mem_free(accel_struct.allocatedSize); [accel_struct release]; } @@ -132,7 +135,7 @@ BVHMetal::~BVHMetal() } bool BVHMetal::build_BLAS_mesh(Progress &progress, - id device, + id mtl_device, id queue, Geometry *const geom, bool refit) @@ -163,7 +166,7 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, } MTLResourceOptions storage_mode; - if (device.hasUnifiedMemory) { + if (mtl_device.hasUnifiedMemory) { storage_mode = MTLResourceStorageModeShared; } else { @@ -172,18 +175,19 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, /* Upload the mesh data to the GPU */ id posBuf = nil; - id indexBuf = [device newBufferWithBytes:tris.data() - length:num_indices * sizeof(tris.data()[0]) - options:storage_mode]; + id indexBuf = [mtl_device newBufferWithBytes:tris.data() + length:num_indices * sizeof(tris.data()[0]) + options:storage_mode]; if (num_motion_steps == 1) { - posBuf = [device newBufferWithBytes:verts.data() - length:num_verts * sizeof(verts.data()[0]) - options:storage_mode]; + posBuf = [mtl_device newBufferWithBytes:verts.data() + length:num_verts * sizeof(verts.data()[0]) + options:storage_mode]; } else { - posBuf = [device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0]) - options:storage_mode]; + posBuf = [mtl_device + newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0]) + options:storage_mode]; float3 *dest_data = (float3 *)[posBuf contents]; size_t center_step = (num_motion_steps - 1) / 2; for (size_t step = 0; step < num_motion_steps; ++step) { @@ -264,13 +268,14 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, MTLAccelerationStructureUsagePreferFastBuild); } - MTLAccelerationStructureSizes accelSizes = [device + MTLAccelerationStructureSizes accelSizes = [mtl_device accelerationStructureSizesWithDescriptor:accelDesc]; - id accel_uncompressed = [device + id accel_uncompressed = [mtl_device newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; - id scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize - options:MTLResourceStorageModePrivate]; - id sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id sizeBuf = [mtl_device newBufferWithLength:8 + options:MTLResourceStorageModeShared]; id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; @@ -314,14 +319,14 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; - id accel = [device + id accel = [mtl_device newAccelerationStructureWithSize:compressed_size]; [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed toAccelerationStructure:accel]; [accelEnc endEncoding]; [accelCommands addCompletedHandler:^(id /*command_buffer*/) { uint64_t allocated_size = [accel allocatedSize]; - stats.mem_alloc(allocated_size); + device->stats.mem_alloc(allocated_size); accel_struct = accel; [accel_uncompressed release]; @@ -336,7 +341,7 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, accel_struct = accel_uncompressed; uint64_t allocated_size = [accel_struct allocatedSize]; - stats.mem_alloc(allocated_size); + device->stats.mem_alloc(allocated_size); /* Signal that we've finished doing GPU acceleration struct build. */ g_bvh_build_throttler.release(wired_size); @@ -355,7 +360,7 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, } bool BVHMetal::build_BLAS_hair(Progress &progress, - id device, + id mtl_device, id queue, Geometry *const geom, bool refit) @@ -382,7 +387,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, } MTLResourceOptions storage_mode; - if (device.hasUnifiedMemory) { + if (mtl_device.hasUnifiedMemory) { storage_mode = MTLResourceStorageModeShared; } else { @@ -445,18 +450,18 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, } /* Allocate and populate MTLBuffers for geometry. */ - idxBuffer = [device newBufferWithBytes:idxData.data() - length:idxData.size() * sizeof(int) - options:storage_mode]; + idxBuffer = [mtl_device newBufferWithBytes:idxData.data() + length:idxData.size() * sizeof(int) + options:storage_mode]; - cpBuffer = [device newBufferWithBytes:cpData.data() - length:cpData.size() * sizeof(float3) - options:storage_mode]; - - radiusBuffer = [device newBufferWithBytes:radiusData.data() - length:radiusData.size() * sizeof(float) + cpBuffer = [mtl_device newBufferWithBytes:cpData.data() + length:cpData.size() * sizeof(float3) options:storage_mode]; + radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data() + length:radiusData.size() * sizeof(float) + options:storage_mode]; + std::vector cp_ptrs; std::vector radius_ptrs; cp_ptrs.reserve(num_motion_steps); @@ -541,18 +546,18 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, } /* Allocate and populate MTLBuffers for geometry. */ - idxBuffer = [device newBufferWithBytes:idxData.data() - length:idxData.size() * sizeof(int) - options:storage_mode]; + idxBuffer = [mtl_device newBufferWithBytes:idxData.data() + length:idxData.size() * sizeof(int) + options:storage_mode]; - cpBuffer = [device newBufferWithBytes:cpData.data() - length:cpData.size() * sizeof(float3) - options:storage_mode]; - - radiusBuffer = [device newBufferWithBytes:radiusData.data() - length:radiusData.size() * sizeof(float) + cpBuffer = [mtl_device newBufferWithBytes:cpData.data() + length:cpData.size() * sizeof(float3) options:storage_mode]; + radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data() + length:radiusData.size() * sizeof(float) + options:storage_mode]; + if (storage_mode == MTLResourceStorageModeManaged) { [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)]; [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)]; @@ -600,13 +605,14 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, } accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits; - MTLAccelerationStructureSizes accelSizes = [device + MTLAccelerationStructureSizes accelSizes = [mtl_device accelerationStructureSizesWithDescriptor:accelDesc]; - id accel_uncompressed = [device + id accel_uncompressed = [mtl_device newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; - id scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize - options:MTLResourceStorageModePrivate]; - id sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id sizeBuf = [mtl_device newBufferWithLength:8 + options:MTLResourceStorageModeShared]; id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; @@ -651,14 +657,14 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; - id accel = [device + id accel = [mtl_device newAccelerationStructureWithSize:compressed_size]; [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed toAccelerationStructure:accel]; [accelEnc endEncoding]; [accelCommands addCompletedHandler:^(id /*command_buffer*/) { uint64_t allocated_size = [accel allocatedSize]; - stats.mem_alloc(allocated_size); + device->stats.mem_alloc(allocated_size); accel_struct = accel; [accel_uncompressed release]; @@ -673,7 +679,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, accel_struct = accel_uncompressed; uint64_t allocated_size = [accel_struct allocatedSize]; - stats.mem_alloc(allocated_size); + device->stats.mem_alloc(allocated_size); /* Signal that we've finished doing GPU acceleration struct build. */ g_bvh_build_throttler.release(wired_size); @@ -690,7 +696,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, } # else /* MAC_OS_VERSION_14_0 */ (void)progress; - (void)device; + (void)mtl_device; (void)queue; (void)geom; (void)(refit); @@ -699,7 +705,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, } bool BVHMetal::build_BLAS_pointcloud(Progress &progress, - id device, + id mtl_device, id queue, Geometry *const geom, bool refit) @@ -732,7 +738,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, const size_t num_aabbs = num_motion_steps * num_points; MTLResourceOptions storage_mode; - if (device.hasUnifiedMemory) { + if (mtl_device.hasUnifiedMemory) { storage_mode = MTLResourceStorageModeShared; } else { @@ -740,7 +746,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, } /* Allocate a GPU buffer for the AABB data and populate it */ - id aabbBuf = [device + id aabbBuf = [mtl_device newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox) options:storage_mode]; MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents]; @@ -848,13 +854,14 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, MTLAccelerationStructureUsagePreferFastBuild); } - MTLAccelerationStructureSizes accelSizes = [device + MTLAccelerationStructureSizes accelSizes = [mtl_device accelerationStructureSizesWithDescriptor:accelDesc]; - id accel_uncompressed = [device + id accel_uncompressed = [mtl_device newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; - id scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize - options:MTLResourceStorageModePrivate]; - id sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id sizeBuf = [mtl_device newBufferWithLength:8 + options:MTLResourceStorageModeShared]; id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; @@ -897,14 +904,14 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; - id accel = [device + id accel = [mtl_device newAccelerationStructureWithSize:compressed_size]; [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed toAccelerationStructure:accel]; [accelEnc endEncoding]; [accelCommands addCompletedHandler:^(id /*command_buffer*/) { uint64_t allocated_size = [accel allocatedSize]; - stats.mem_alloc(allocated_size); + device->stats.mem_alloc(allocated_size); accel_struct = accel; [accel_uncompressed release]; @@ -919,7 +926,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, accel_struct = accel_uncompressed; uint64_t allocated_size = [accel_struct allocatedSize]; - stats.mem_alloc(allocated_size); + device->stats.mem_alloc(allocated_size); /* Signal that we've finished doing GPU acceleration struct build. */ g_bvh_build_throttler.release(wired_size); @@ -937,7 +944,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, } bool BVHMetal::build_BLAS(Progress &progress, - id device, + id mtl_device, id queue, bool refit) { @@ -948,11 +955,11 @@ bool BVHMetal::build_BLAS(Progress &progress, switch (geom->geometry_type) { case Geometry::VOLUME: case Geometry::MESH: - return build_BLAS_mesh(progress, device, queue, geom, refit); + return build_BLAS_mesh(progress, mtl_device, queue, geom, refit); case Geometry::HAIR: - return build_BLAS_hair(progress, device, queue, geom, refit); + return build_BLAS_hair(progress, mtl_device, queue, geom, refit); case Geometry::POINTCLOUD: - return build_BLAS_pointcloud(progress, device, queue, geom, refit); + return build_BLAS_pointcloud(progress, mtl_device, queue, geom, refit); default: return false; } @@ -960,7 +967,7 @@ bool BVHMetal::build_BLAS(Progress &progress, } bool BVHMetal::build_TLAS(Progress &progress, - id device, + id mtl_device, id queue, bool refit) { @@ -969,14 +976,14 @@ bool BVHMetal::build_TLAS(Progress &progress, if (@available(macos 12.0, *)) { /* Defined inside available check, for return type to be available. */ - auto make_null_BLAS = [](id device, + auto make_null_BLAS = [](id mtl_device, id queue) -> id { MTLResourceOptions storage_mode = MTLResourceStorageModeManaged; - if (device.hasUnifiedMemory) { + if (mtl_device.hasUnifiedMemory) { storage_mode = MTLResourceStorageModeShared; } - id nullBuf = [device newBufferWithLength:sizeof(float3) options:storage_mode]; + id nullBuf = [mtl_device newBufferWithLength:sizeof(float3) options:storage_mode]; /* Create an acceleration structure. */ MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc = @@ -997,13 +1004,14 @@ bool BVHMetal::build_TLAS(Progress &progress, accelDesc.geometryDescriptors = @[ geomDesc ]; accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits; - MTLAccelerationStructureSizes accelSizes = [device + MTLAccelerationStructureSizes accelSizes = [mtl_device accelerationStructureSizesWithDescriptor:accelDesc]; - id accel_struct = [device + id accel_struct = [mtl_device newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; - id scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize - options:MTLResourceStorageModePrivate]; - id sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id sizeBuf = [mtl_device newBufferWithLength:8 + options:MTLResourceStorageModeShared]; id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; @@ -1070,7 +1078,7 @@ bool BVHMetal::build_TLAS(Progress &progress, }; MTLResourceOptions storage_mode; - if (device.hasUnifiedMemory) { + if (mtl_device.hasUnifiedMemory) { storage_mode = MTLResourceStorageModeShared; } else { @@ -1086,12 +1094,12 @@ bool BVHMetal::build_TLAS(Progress &progress, } /* Allocate a GPU buffer for the instance data and populate it */ - id instanceBuf = [device newBufferWithLength:num_instances * instance_size - options:storage_mode]; + id instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size + options:storage_mode]; id motion_transforms_buf = nil; MTLPackedFloat4x3 *motion_transforms = nullptr; if (motion_blur && num_motion_transforms) { - motion_transforms_buf = [device + motion_transforms_buf = [mtl_device newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3) options:storage_mode]; motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents; @@ -1108,14 +1116,14 @@ bool BVHMetal::build_TLAS(Progress &progress, Geometry const *geom = ob->get_geometry(); BVHMetal const *blas = static_cast(geom->bvh); if (!blas || !blas->accel_struct) { - /* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index() + /* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_mtl_device_index() * in our intersection functions */ blas = nullptr; /* Workaround for issue in macOS <= 14.1: Insert degenerate BLAS instead of zero-filling * the descriptor. */ if (!null_BLAS) { - null_BLAS = make_null_BLAS(device, queue); + null_BLAS = make_null_BLAS(mtl_device, queue); } blas_array.push_back(null_BLAS); } @@ -1259,12 +1267,12 @@ bool BVHMetal::build_TLAS(Progress &progress, MTLAccelerationStructureUsagePreferFastBuild); } - MTLAccelerationStructureSizes accelSizes = [device + MTLAccelerationStructureSizes accelSizes = [mtl_device accelerationStructureSizesWithDescriptor:accelDesc]; - id accel = [device + id accel = [mtl_device newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; - id scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize - options:MTLResourceStorageModePrivate]; + id scratchBuf = [mtl_device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; id accelCommands = [queue commandBuffer]; id accelEnc = [accelCommands accelerationStructureCommandEncoder]; @@ -1292,7 +1300,7 @@ bool BVHMetal::build_TLAS(Progress &progress, [scratchBuf release]; uint64_t allocated_size = [accel allocatedSize]; - stats.mem_alloc(allocated_size); + device->stats.mem_alloc(allocated_size); /* Cache top and bottom-level acceleration structs */ accel_struct = accel; @@ -1309,7 +1317,7 @@ bool BVHMetal::build_TLAS(Progress &progress, } bool BVHMetal::build(Progress &progress, - id device, + id mtl_device, id queue, bool refit) { @@ -1319,7 +1327,7 @@ bool BVHMetal::build(Progress &progress, } else { if (accel_struct) { - stats.mem_free(accel_struct.allocatedSize); + device->stats.mem_free(accel_struct.allocatedSize); [accel_struct release]; accel_struct = nil; } @@ -1328,10 +1336,10 @@ bool BVHMetal::build(Progress &progress, @autoreleasepool { if (!params.top_level) { - return build_BLAS(progress, device, queue, refit); + return build_BLAS(progress, mtl_device, queue, refit); } else { - return build_TLAS(progress, device, queue, refit); + return build_TLAS(progress, mtl_device, queue, refit); } } } diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index 04a862d7785..c6e8407d266 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -140,6 +140,8 @@ class MetalDevice : public Device { virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override; + virtual void release_bvh(BVH *bvh) override; + virtual void optimize_for_scene(Scene *scene) override; static void compile_and_load(int device_id, MetalPipelineType pso_type); diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 319a348db7f..c97a8375f7d 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -1443,6 +1443,13 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) } } +void MetalDevice::release_bvh(BVH *bvh) +{ + if (bvhMetalRT == bvh) { + bvhMetalRT = nullptr; + } +} + CCL_NAMESPACE_END #endif diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index be3f1c0ad3d..3b207e5110c 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -1680,7 +1680,7 @@ void OptiXDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) } } -void OptiXDevice::release_optix_bvh(BVH *bvh) +void OptiXDevice::release_bvh(BVH *bvh) { thread_scoped_lock lock(delayed_free_bvh_mutex); /* Do delayed free of BVH memory, since geometry holding BVH might be deleted diff --git a/intern/cycles/device/optix/device_impl.h b/intern/cycles/device/optix/device_impl.h index 369e685f792..870983cb5a8 100644 --- a/intern/cycles/device/optix/device_impl.h +++ b/intern/cycles/device/optix/device_impl.h @@ -106,7 +106,7 @@ class OptiXDevice : public CUDADevice { void build_bvh(BVH *bvh, Progress &progress, bool refit) override; - void release_optix_bvh(BVH *bvh) override; + void release_bvh(BVH *bvh) override; void free_bvh_memory_delayed(); void const_copy_to(const char *name, void *host, size_t size) override;