From 1a93dfe4fcea17fa8bed998e0e66b94de585f6fa Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 18 Mar 2025 19:09:25 +0100 Subject: [PATCH] Cycles: Apple Silicon tidy: Remove non-UMA codepaths This PR removes a bunch of dead code following #123551 (removal of AMD and Intel GPU support). It is safe to assume that UMA will be available, so a lot of codepaths that dealt with copying between CPU and GPU are now just clutter. Pull Request: https://projects.blender.org/blender/blender/pulls/136117 --- intern/cycles/device/metal/bvh.mm | 90 +++----------- intern/cycles/device/metal/device_impl.h | 2 - intern/cycles/device/metal/device_impl.mm | 137 +++++----------------- intern/cycles/device/metal/queue.h | 7 -- intern/cycles/device/metal/queue.mm | 101 +--------------- intern/cycles/device/metal/util.h | 1 - intern/cycles/device/metal/util.mm | 20 +--- 7 files changed, 51 insertions(+), 307 deletions(-) diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm index c25727c830e..178d6d49002 100644 --- a/intern/cycles/device/metal/bvh.mm +++ b/intern/cycles/device/metal/bvh.mm @@ -196,29 +196,21 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, num_motion_steps = mesh->get_motion_steps(); } - MTLResourceOptions storage_mode; - if (mtl_device.hasUnifiedMemory) { - storage_mode = MTLResourceStorageModeShared; - } - else { - storage_mode = MTLResourceStorageModeManaged; - } - /* Upload the mesh data to the GPU */ id posBuf = nil; id indexBuf = [mtl_device newBufferWithBytes:tris.data() length:num_indices * sizeof(tris.data()[0]) - options:storage_mode]; + options:MTLResourceStorageModeShared]; if (num_motion_steps == 1) { posBuf = [mtl_device newBufferWithBytes:verts.data() length:num_verts * sizeof(verts.data()[0]) - options:storage_mode]; + options:MTLResourceStorageModeShared]; } else { posBuf = [mtl_device newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0]) - options:storage_mode]; + options:MTLResourceStorageModeShared]; float3 *dest_data = (float3 *)[posBuf contents]; size_t center_step = (num_motion_steps - 1) / 2; for (size_t step = 0; step < num_motion_steps; ++step) { @@ -230,9 +222,6 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, } std::copy_n(verts, num_verts, dest_data + num_verts * step); } - if (storage_mode == MTLResourceStorageModeManaged) { - [posBuf didModifyRange:NSMakeRange(0, posBuf.length)]; - } } /* Create an acceleration structure. */ @@ -412,14 +401,6 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, num_motion_steps = hair->get_motion_steps(); } - MTLResourceOptions storage_mode; - if (mtl_device.hasUnifiedMemory) { - storage_mode = MTLResourceStorageModeShared; - } - else { - storage_mode = MTLResourceStorageModeManaged; - } - id cpBuffer = nil; id radiusBuffer = nil; id idxBuffer = nil; @@ -478,15 +459,15 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, /* Allocate and populate MTLBuffers for geometry. */ idxBuffer = [mtl_device newBufferWithBytes:idxData.data() length:idxData.size() * sizeof(int) - options:storage_mode]; + options:MTLResourceStorageModeShared]; cpBuffer = [mtl_device newBufferWithBytes:cpData.data() length:cpData.size() * sizeof(float3) - options:storage_mode]; + options:MTLResourceStorageModeShared]; radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data() length:radiusData.size() * sizeof(float) - options:storage_mode]; + options:MTLResourceStorageModeShared]; std::vector cp_ptrs; std::vector radius_ptrs; @@ -505,12 +486,6 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, radius_ptrs.push_back(k); } - if (storage_mode == MTLResourceStorageModeManaged) { - [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)]; - [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)]; - [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)]; - } - geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data() count:cp_ptrs.size()]; geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data() @@ -574,21 +549,16 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, /* Allocate and populate MTLBuffers for geometry. */ idxBuffer = [mtl_device newBufferWithBytes:idxData.data() length:idxData.size() * sizeof(int) - options:storage_mode]; + options:MTLResourceStorageModeShared]; cpBuffer = [mtl_device newBufferWithBytes:cpData.data() length:cpData.size() * sizeof(float3) - options:storage_mode]; + options:MTLResourceStorageModeShared]; radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data() length:radiusData.size() * sizeof(float) - options:storage_mode]; + options:MTLResourceStorageModeShared]; - if (storage_mode == MTLResourceStorageModeManaged) { - [cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)]; - [idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)]; - [radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)]; - } geomDescCrv.controlPointBuffer = cpBuffer; geomDescCrv.radiusBuffer = radiusBuffer; geomDescCrv.controlPointCount = cpData.size(); @@ -758,18 +728,10 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, const size_t num_aabbs = num_motion_steps * num_points; - MTLResourceOptions storage_mode; - if (mtl_device.hasUnifiedMemory) { - storage_mode = MTLResourceStorageModeShared; - } - else { - storage_mode = MTLResourceStorageModeManaged; - } - /* Allocate a GPU buffer for the AABB data and populate it */ id aabbBuf = [mtl_device newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox) - options:storage_mode]; + options:MTLResourceStorageModeShared]; MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents]; /* Get AABBs for each motion step */ @@ -803,10 +765,6 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, } } - if (storage_mode == MTLResourceStorageModeManaged) { - [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)]; - } - MTLAccelerationStructureGeometryDescriptor *geomDesc; if (motion_blur) { std::vector aabb_ptrs; @@ -987,12 +945,8 @@ bool BVHMetal::build_TLAS(Progress &progress, /* Defined inside available check, for return type to be available. */ auto make_null_BLAS = [](id mtl_device, id queue) -> id { - MTLResourceOptions storage_mode = MTLResourceStorageModeManaged; - if (mtl_device.hasUnifiedMemory) { - storage_mode = MTLResourceStorageModeShared; - } - - id nullBuf = [mtl_device newBufferWithLength:sizeof(float3) options:storage_mode]; + id nullBuf = [mtl_device newBufferWithLength:sizeof(float3) + options:MTLResourceStorageModeShared]; /* Create an acceleration structure. */ MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc = @@ -1080,14 +1034,6 @@ bool BVHMetal::build_TLAS(Progress &progress, return blas_index; }; - MTLResourceOptions storage_mode; - if (mtl_device.hasUnifiedMemory) { - storage_mode = MTLResourceStorageModeShared; - } - else { - storage_mode = MTLResourceStorageModeManaged; - } - size_t instance_size; if (motion_blur) { instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor); @@ -1098,13 +1044,13 @@ bool BVHMetal::build_TLAS(Progress &progress, /* Allocate a GPU buffer for the instance data and populate it */ id instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size - options:storage_mode]; + options:MTLResourceStorageModeShared]; id motion_transforms_buf = nil; MTLPackedFloat4x3 *motion_transforms = nullptr; if (motion_blur && num_motion_transforms) { motion_transforms_buf = [mtl_device newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3) - options:storage_mode]; + options:MTLResourceStorageModeShared]; motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents; } @@ -1241,14 +1187,6 @@ bool BVHMetal::build_TLAS(Progress &progress, } } - if (storage_mode == MTLResourceStorageModeManaged) { - [instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)]; - if (motion_transforms_buf) { - [motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)]; - assert(num_motion_transforms == motion_transform_index); - } - } - MTLInstanceAccelerationStructureDescriptor *accelDesc = [MTLInstanceAccelerationStructureDescriptor descriptor]; accelDesc.instanceCount = num_instances; diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index 759644e7afc..148e1897279 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -56,7 +56,6 @@ class MetalDevice : public Device { uint kernel_features = 0; bool using_nanovdb = false; - MTLResourceOptions default_storage_mode; int max_threads_per_threadgroup; int mtlDevId = 0; @@ -70,7 +69,6 @@ class MetalDevice : public Device { uint64_t offset = 0; uint64_t size = 0; void *hostPtr = nullptr; - bool use_UMA = false; /* If true, UMA memory in shared_pointer is being used. */ }; using MetalMemMap = map>; MetalMemMap metal_mem_map; diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 1b2abea8617..c58eb7c4f71 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -86,16 +86,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile mtlDevice = usable_devices[mtlDevId]; metal_printf("Creating new Cycles Metal device: %s\n", info.description.c_str()); - /* determine default storage mode based on whether UMA is supported */ - - default_storage_mode = MTLResourceStorageModeManaged; - - /* We only support Apple Silicon which hasUnifiedMemory support. But leave this check here - * just in case a future GPU comes out that doesn't. */ - if ([mtlDevice hasUnifiedMemory]) { - default_storage_mode = MTLResourceStorageModeShared; - } - max_threads_per_threadgroup = 512; use_metalrt = info.use_hardware_raytracing; @@ -144,9 +134,11 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile arg_desc_buffer.access = MTLArgumentAccessReadOnly; mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]]; - buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode]; - texture_bindings_2d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode]; - texture_bindings_3d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode]; + buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:MTLResourceStorageModeShared]; + texture_bindings_2d = [mtlDevice newBufferWithLength:8192 + options:MTLResourceStorageModeShared]; + texture_bindings_3d = [mtlDevice newBufferWithLength:8192 + options:MTLResourceStorageModeShared]; stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize); @@ -637,10 +629,6 @@ void MetalDevice::load_texture_info() [mtlTextureArgEncoder setTexture:nil atIndex:0]; } } - if (default_storage_mode == MTLResourceStorageModeManaged) { - [texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))]; - [texture_bindings_3d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))]; - } } } @@ -679,7 +667,7 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem) mem.device_pointer = 0; id metal_buffer = nil; - MTLResourceOptions options = default_storage_mode; + MTLResourceOptions options = MTLResourceStorageModeShared; if (size > 0) { if (mem.type == MEM_DEVICE_ONLY && !capture_enabled) { @@ -725,22 +713,15 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem) * pointer recalculation */ mem.device_pointer = device_ptr(mmem.get()); - if (metal_buffer.storageMode == MTLStorageModeShared) { - /* Replace host pointer with our host allocation. */ + /* Replace host pointer with our host allocation. */ + if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) { + memcpy(mmem->hostPtr, mem.host_pointer, size); - if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) { - memcpy(mmem->hostPtr, mem.host_pointer, size); - - host_free(mem.type, mem.host_pointer, mem.memory_size()); - mem.host_pointer = mmem->hostPtr; - } - mem.shared_pointer = mmem->hostPtr; - mem.shared_counter++; - mmem->use_UMA = true; - } - else { - mmem->use_UMA = false; + host_free(mem.type, mem.host_pointer, mem.memory_size()); + mem.host_pointer = mmem->hostPtr; } + mem.shared_pointer = mmem->hostPtr; + mem.shared_counter++; MetalMem *mmem_ptr = mmem.get(); metal_mem_map[&mem] = std::move(mmem); @@ -756,18 +737,7 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem) void MetalDevice::generic_copy_to(device_memory &mem) { - if (!mem.host_pointer || !mem.device_pointer) { - return; - } - - std::lock_guard lock(metal_mem_map_mutex); - if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) { - MetalMem &mmem = *metal_mem_map.at(&mem); - memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size()); - if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) { - [mmem.mtlBuffer didModifyRange:NSMakeRange(0, mem.memory_size())]; - } - } + /* No need to copy - Apple Silicon has Unified Memory Architecture. */ } void MetalDevice::generic_free(device_memory &mem) @@ -789,18 +759,13 @@ void MetalDevice::generic_free(device_memory &mem) bool free_mtlBuffer = false; - if (mmem.use_UMA) { - assert(mem.shared_pointer); - if (mem.shared_pointer) { - assert(mem.shared_counter > 0); - if (--mem.shared_counter == 0) { - free_mtlBuffer = true; - } + assert(mem.shared_pointer); + if (mem.shared_pointer) { + assert(mem.shared_counter > 0); + if (--mem.shared_counter == 0) { + free_mtlBuffer = true; } } - else { - free_mtlBuffer = true; - } if (free_mtlBuffer) { if (mem.host_pointer && mem.host_pointer == mem.shared_pointer) { @@ -810,7 +775,6 @@ void MetalDevice::generic_free(device_memory &mem) assert(!"Metal device should not copy memory back to host"); mem.host_pointer = mem.host_alloc(size); memcpy(mem.host_pointer, mem.shared_pointer, size); - mmem.use_UMA = false; } mem.shared_pointer = nullptr; @@ -872,36 +836,7 @@ void MetalDevice::mem_move_to_host(device_memory & /*mem*/) void MetalDevice::mem_copy_from( device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem) { - @autoreleasepool { - if (mem.host_pointer) { - - bool subcopy = (w >= 0 && h >= 0); - const size_t size = subcopy ? (elem * w * h) : mem.memory_size(); - const size_t offset = subcopy ? (elem * y * w) : 0; - - if (mem.device_pointer) { - std::lock_guard lock(metal_mem_map_mutex); - MetalMem &mmem = *metal_mem_map.at(&mem); - - if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) { - - id cmdBuffer = [mtlGeneralCommandQueue commandBuffer]; - id blitEncoder = [cmdBuffer blitCommandEncoder]; - [blitEncoder synchronizeResource:mmem.mtlBuffer]; - [blitEncoder endEncoding]; - [cmdBuffer commit]; - [cmdBuffer waitUntilCompleted]; - } - - if (mem.host_pointer != mmem.hostPtr) { - memcpy((uchar *)mem.host_pointer + offset, (uchar *)mmem.hostPtr + offset, size); - } - } - else { - memset((char *)mem.host_pointer + offset, 0, size); - } - } - } + /* No need to copy - Apple Silicon has Unified Memory Architecture. */ } void MetalDevice::mem_zero(device_memory &mem) @@ -909,17 +844,8 @@ void MetalDevice::mem_zero(device_memory &mem) if (!mem.device_pointer) { mem_alloc(mem); } - if (!mem.device_pointer) { - return; - } - - size_t size = mem.memory_size(); - std::lock_guard lock(metal_mem_map_mutex); - MetalMem &mmem = *metal_mem_map.at(&mem); - memset(mmem.hostPtr, 0, size); - if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) { - [mmem.mtlBuffer didModifyRange:NSMakeRange(0, size)]; - } + assert(mem.shared_pointer); + memset(mem.shared_pointer, 0, mem.memory_size()); } void MetalDevice::mem_free(device_memory &mem) @@ -1134,10 +1060,6 @@ void MetalDevice::tex_alloc(device_texture &mem) return; } } - MTLStorageMode storage_mode = MTLStorageModeManaged; - if ([mtlDevice hasUnifiedMemory]) { - storage_mode = MTLStorageModeShared; - } /* General variables for both architectures */ size_t size = mem.memory_size(); @@ -1212,7 +1134,7 @@ void MetalDevice::tex_alloc(device_texture &mem) height:mem.data_height mipmapped:NO]; - desc.storageMode = storage_mode; + desc.storageMode = MTLStorageModeShared; desc.usage = MTLTextureUsageShaderRead; desc.textureType = MTLTextureType3D; @@ -1248,7 +1170,7 @@ void MetalDevice::tex_alloc(device_texture &mem) height:mem.data_height mipmapped:NO]; - desc.storageMode = storage_mode; + desc.storageMode = MTLStorageModeShared; desc.usage = MTLTextureUsageShaderRead; VLOG_WORK << "Texture 2D allocate: " << mem.name << ", " @@ -1301,11 +1223,11 @@ void MetalDevice::tex_alloc(device_texture &mem) texture_bindings_3d.allocatedSize); } buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length - options:default_storage_mode]; + options:MTLResourceStorageModeShared]; texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length - options:default_storage_mode]; + options:MTLResourceStorageModeShared]; texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length - options:default_storage_mode]; + options:MTLResourceStorageModeShared]; stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize + texture_bindings_3d.allocatedSize); @@ -1484,7 +1406,7 @@ void MetalDevice::update_bvh(BVHMetal *bvh_metal) // Allocate required buffers for BLAS array. uint64_t count = bvh_metal->blas_array.size(); uint64_t buffer_size = mtlBlasArgEncoder.encodedLength * count; - blas_buffer = [mtlDevice newBufferWithLength:buffer_size options:default_storage_mode]; + blas_buffer = [mtlDevice newBufferWithLength:buffer_size options:MTLResourceStorageModeShared]; stats.mem_alloc(blas_buffer.allocatedSize); for (uint64_t i = 0; i < count; ++i) { @@ -1493,9 +1415,6 @@ void MetalDevice::update_bvh(BVHMetal *bvh_metal) [mtlBlasArgEncoder setAccelerationStructure:bvh_metal->blas_array[i] atIndex:0]; } } - if (default_storage_mode == MTLResourceStorageModeManaged) { - [blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)]; - } } CCL_NAMESPACE_END diff --git a/intern/cycles/device/metal/queue.h b/intern/cycles/device/metal/queue.h index 98a586e6a0e..c069693b48a 100644 --- a/intern/cycles/device/metal/queue.h +++ b/intern/cycles/device/metal/queue.h @@ -71,13 +71,6 @@ class MetalDeviceQueue : public DeviceQueue { dispatch_queue_t event_queue_; dispatch_semaphore_t wait_semaphore_; - struct CopyBack { - void *host_pointer; - void *gpu_mem; - uint64_t size; - }; - std::vector copy_back_mem_; - uint64_t shared_event_id_; uint64_t command_buffers_submitted_ = 0; uint64_t command_buffers_completed_ = 0; diff --git a/intern/cycles/device/metal/queue.mm b/intern/cycles/device/metal/queue.mm index cf6cf6c8a82..f823f2a0bf7 100644 --- a/intern/cycles/device/metal/queue.mm +++ b/intern/cycles/device/metal/queue.mm @@ -398,17 +398,8 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, plain_old_launch_data_size); /* Allocate an argument buffer. */ - MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged; - if ([mtlDevice_ hasUnifiedMemory]) { - arg_buffer_options = MTLResourceStorageModeShared; - } - - id arg_buffer = temp_buffer_pool_.get_buffer(mtlDevice_, - mtlCommandBuffer_, - arg_buffer_length, - arg_buffer_options, - init_arg_buffer, - stats_); + id arg_buffer = temp_buffer_pool_.get_buffer( + mtlDevice_, mtlCommandBuffer_, arg_buffer_length, init_arg_buffer, stats_); /* Encode the pointer "enqueue" arguments */ bytes_written = 0; @@ -516,10 +507,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, bytes_written = metal_offsets + metal_device_->mtlAncillaryArgEncoder.encodedLength; } - if (arg_buffer.storageMode == MTLStorageModeManaged) { - [arg_buffer didModifyRange:NSMakeRange(0, bytes_written)]; - } - [mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0]; [mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1]; [mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2]; @@ -624,15 +611,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel, for (auto &it : metal_device_->metal_mem_map) { const string c_integrator_queue_counter = "integrator_queue_counter"; if (it.first->name == c_integrator_queue_counter) { - /* Workaround "device_copy_from" being protected. */ - struct MyDeviceMemory : device_memory { - void device_copy_from__IntegratorQueueCounter() - { - device_copy_from(0, data_width, 1, sizeof(IntegratorQueueCounter)); - } - }; - ((MyDeviceMemory *)it.first)->device_copy_from__IntegratorQueueCounter(); - if (IntegratorQueueCounter *queue_counter = (IntegratorQueueCounter *) it.first->host_pointer) { @@ -701,11 +679,6 @@ bool MetalDeviceQueue::synchronize() [mtlCommandBuffer_ release]; - for (const CopyBack &mmem : copy_back_mem_) { - memcpy((uchar *)mmem.host_pointer, (uchar *)mmem.gpu_mem, mmem.size); - } - copy_back_mem_.clear(); - temp_buffer_pool_.process_command_buffer_completion(mtlCommandBuffer_); metal_device_->flush_delayed_free_list(); @@ -768,79 +741,13 @@ void MetalDeviceQueue::copy_to_device(device_memory &mem) assert(mem.device_pointer != 0); assert(mem.host_pointer != nullptr); - - std::lock_guard lock(metal_device_->metal_mem_map_mutex); - auto result = metal_device_->metal_mem_map.find(&mem); - if (result != metal_device_->metal_mem_map.end()) { - if (mem.host_pointer == mem.shared_pointer) { - return; - } - - MetalDevice::MetalMem &mmem = *result->second; - id blitEncoder = get_blit_encoder(); - - id buffer = temp_buffer_pool_.get_buffer(mtlDevice_, - mtlCommandBuffer_, - mmem.size, - MTLResourceStorageModeShared, - mem.host_pointer, - stats_); - - [blitEncoder copyFromBuffer:buffer - sourceOffset:0 - toBuffer:mmem.mtlBuffer - destinationOffset:mmem.offset - size:mmem.size]; - } - else { - metal_device_->mem_copy_to(mem); - } + /* No need to copy - Apple Silicon has Unified Memory Architecture. */ } } void MetalDeviceQueue::copy_from_device(device_memory &mem) { - @autoreleasepool { - if (metal_device_->have_error()) { - return; - } - - assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE); - - if (mem.memory_size() == 0) { - return; - } - - assert(mem.device_pointer != 0); - assert(mem.host_pointer != nullptr); - - std::lock_guard lock(metal_device_->metal_mem_map_mutex); - MetalDevice::MetalMem &mmem = *metal_device_->metal_mem_map.at(&mem); - if (mmem.mtlBuffer) { - const size_t size = mem.memory_size(); - - if (mem.device_pointer) { - if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) { - id blitEncoder = get_blit_encoder(); - [blitEncoder synchronizeResource:mmem.mtlBuffer]; - } - if (mem.host_pointer != mmem.hostPtr) { - if (mtlCommandBuffer_) { - copy_back_mem_.push_back({mem.host_pointer, mmem.hostPtr, size}); - } - else { - memcpy((uchar *)mem.host_pointer, (uchar *)mmem.hostPtr, size); - } - } - } - else { - memset((char *)mem.host_pointer, 0, size); - } - } - else { - metal_device_->mem_copy_from(mem); - } - } + /* No need to copy - Apple Silicon has Unified Memory Architecture. */ } void MetalDeviceQueue::prepare_resources(DeviceKernel /*kernel*/) diff --git a/intern/cycles/device/metal/util.h b/intern/cycles/device/metal/util.h index 889c376f91e..263ac7a6bc4 100644 --- a/intern/cycles/device/metal/util.h +++ b/intern/cycles/device/metal/util.h @@ -54,7 +54,6 @@ class MetalBufferPool { id get_buffer(id device, id command_buffer, NSUInteger length, - MTLResourceOptions options, const void *pointer, Stats &stats); void process_command_buffer_completion(id command_buffer); diff --git a/intern/cycles/device/metal/util.mm b/intern/cycles/device/metal/util.mm index 88f48cd0f50..34f7ea9f92d 100644 --- a/intern/cycles/device/metal/util.mm +++ b/intern/cycles/device/metal/util.mm @@ -95,7 +95,9 @@ const vector> &MetalInfo::get_usable_devices() strstr(device_name_char, "Apple")) { /* TODO: Implement a better way to identify device vendor instead of relying on name. */ - usable = true; + /* We only support Apple Silicon GPUs which all have unified memory, but explicitly check + * just in case it ever changes. */ + usable = [device hasUnifiedMemory]; } } @@ -119,24 +121,15 @@ const vector> &MetalInfo::get_usable_devices() id MetalBufferPool::get_buffer(id device, id command_buffer, NSUInteger length, - MTLResourceOptions options, const void *pointer, Stats &stats) { id buffer = nil; - - MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >> - MTLResourceStorageModeShift); - MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >> - MTLResourceCPUCacheModeShift); - { thread_scoped_lock lock(buffer_mutex); /* Find an unused buffer with matching size and storage mode. */ for (MetalBufferListEntry &bufferEntry : temp_buffers) { - if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode && - cpuCacheMode == bufferEntry.buffer.cpuCacheMode && bufferEntry.command_buffer == nil) - { + if (bufferEntry.buffer.length == length && bufferEntry.command_buffer == nil) { buffer = bufferEntry.buffer; bufferEntry.command_buffer = command_buffer; break; @@ -145,7 +138,7 @@ id MetalBufferPool::get_buffer(id device, if (!buffer) { /* Create a new buffer and add it to the pool. Typically this pool will only grow to a * handful of entries. */ - buffer = [device newBufferWithLength:length options:options]; + buffer = [device newBufferWithLength:length options:MTLResourceStorageModeShared]; stats.mem_alloc(buffer.allocatedSize); total_temp_mem_size += buffer.allocatedSize; temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer}); @@ -155,9 +148,6 @@ id MetalBufferPool::get_buffer(id device, /* Copy over data */ if (pointer) { memcpy(buffer.contents, pointer, length); - if (buffer.storageMode == MTLStorageModeManaged) { - [buffer didModifyRange:NSMakeRange(0, length)]; - } } return buffer;