Cycles: Apple Silicon tidy: Remove non-UMA codepaths
This PR removes a bunch of dead code following #123551 (removal of AMD and Intel GPU support). It is safe to assume that UMA will be available, so a lot of codepaths that dealt with copying between CPU and GPU are now just clutter. Pull Request: https://projects.blender.org/blender/blender/pulls/136117
This commit is contained in:
committed by
Michael Jones (Apple)
parent
7bf17d83ab
commit
1a93dfe4fc
@@ -196,29 +196,21 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress,
|
|||||||
num_motion_steps = mesh->get_motion_steps();
|
num_motion_steps = mesh->get_motion_steps();
|
||||||
}
|
}
|
||||||
|
|
||||||
MTLResourceOptions storage_mode;
|
|
||||||
if (mtl_device.hasUnifiedMemory) {
|
|
||||||
storage_mode = MTLResourceStorageModeShared;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
storage_mode = MTLResourceStorageModeManaged;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Upload the mesh data to the GPU */
|
/* Upload the mesh data to the GPU */
|
||||||
id<MTLBuffer> posBuf = nil;
|
id<MTLBuffer> posBuf = nil;
|
||||||
id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
|
id<MTLBuffer> indexBuf = [mtl_device newBufferWithBytes:tris.data()
|
||||||
length:num_indices * sizeof(tris.data()[0])
|
length:num_indices * sizeof(tris.data()[0])
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
if (num_motion_steps == 1) {
|
if (num_motion_steps == 1) {
|
||||||
posBuf = [mtl_device newBufferWithBytes:verts.data()
|
posBuf = [mtl_device newBufferWithBytes:verts.data()
|
||||||
length:num_verts * sizeof(verts.data()[0])
|
length:num_verts * sizeof(verts.data()[0])
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
posBuf = [mtl_device
|
posBuf = [mtl_device
|
||||||
newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
|
newBufferWithLength:num_verts * num_motion_steps * sizeof(verts.data()[0])
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
float3 *dest_data = (float3 *)[posBuf contents];
|
float3 *dest_data = (float3 *)[posBuf contents];
|
||||||
size_t center_step = (num_motion_steps - 1) / 2;
|
size_t center_step = (num_motion_steps - 1) / 2;
|
||||||
for (size_t step = 0; step < num_motion_steps; ++step) {
|
for (size_t step = 0; step < num_motion_steps; ++step) {
|
||||||
@@ -230,9 +222,6 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress,
|
|||||||
}
|
}
|
||||||
std::copy_n(verts, num_verts, dest_data + num_verts * step);
|
std::copy_n(verts, num_verts, dest_data + num_verts * step);
|
||||||
}
|
}
|
||||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
|
||||||
[posBuf didModifyRange:NSMakeRange(0, posBuf.length)];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create an acceleration structure. */
|
/* Create an acceleration structure. */
|
||||||
@@ -412,14 +401,6 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
|
|||||||
num_motion_steps = hair->get_motion_steps();
|
num_motion_steps = hair->get_motion_steps();
|
||||||
}
|
}
|
||||||
|
|
||||||
MTLResourceOptions storage_mode;
|
|
||||||
if (mtl_device.hasUnifiedMemory) {
|
|
||||||
storage_mode = MTLResourceStorageModeShared;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
storage_mode = MTLResourceStorageModeManaged;
|
|
||||||
}
|
|
||||||
|
|
||||||
id<MTLBuffer> cpBuffer = nil;
|
id<MTLBuffer> cpBuffer = nil;
|
||||||
id<MTLBuffer> radiusBuffer = nil;
|
id<MTLBuffer> radiusBuffer = nil;
|
||||||
id<MTLBuffer> idxBuffer = nil;
|
id<MTLBuffer> idxBuffer = nil;
|
||||||
@@ -478,15 +459,15 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
|
|||||||
/* Allocate and populate MTLBuffers for geometry. */
|
/* Allocate and populate MTLBuffers for geometry. */
|
||||||
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
|
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
|
||||||
length:idxData.size() * sizeof(int)
|
length:idxData.size() * sizeof(int)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
|
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
|
||||||
length:cpData.size() * sizeof(float3)
|
length:cpData.size() * sizeof(float3)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
|
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
|
||||||
length:radiusData.size() * sizeof(float)
|
length:radiusData.size() * sizeof(float)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
std::vector<MTLMotionKeyframeData *> cp_ptrs;
|
std::vector<MTLMotionKeyframeData *> cp_ptrs;
|
||||||
std::vector<MTLMotionKeyframeData *> radius_ptrs;
|
std::vector<MTLMotionKeyframeData *> radius_ptrs;
|
||||||
@@ -505,12 +486,6 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
|
|||||||
radius_ptrs.push_back(k);
|
radius_ptrs.push_back(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
|
||||||
[cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
|
|
||||||
[idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
|
|
||||||
[radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
|
|
||||||
}
|
|
||||||
|
|
||||||
geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
|
geomDescCrv.controlPointBuffers = [NSArray arrayWithObjects:cp_ptrs.data()
|
||||||
count:cp_ptrs.size()];
|
count:cp_ptrs.size()];
|
||||||
geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
|
geomDescCrv.radiusBuffers = [NSArray arrayWithObjects:radius_ptrs.data()
|
||||||
@@ -574,21 +549,16 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
|
|||||||
/* Allocate and populate MTLBuffers for geometry. */
|
/* Allocate and populate MTLBuffers for geometry. */
|
||||||
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
|
idxBuffer = [mtl_device newBufferWithBytes:idxData.data()
|
||||||
length:idxData.size() * sizeof(int)
|
length:idxData.size() * sizeof(int)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
|
cpBuffer = [mtl_device newBufferWithBytes:cpData.data()
|
||||||
length:cpData.size() * sizeof(float3)
|
length:cpData.size() * sizeof(float3)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
|
radiusBuffer = [mtl_device newBufferWithBytes:radiusData.data()
|
||||||
length:radiusData.size() * sizeof(float)
|
length:radiusData.size() * sizeof(float)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
|
||||||
[cpBuffer didModifyRange:NSMakeRange(0, cpBuffer.length)];
|
|
||||||
[idxBuffer didModifyRange:NSMakeRange(0, idxBuffer.length)];
|
|
||||||
[radiusBuffer didModifyRange:NSMakeRange(0, radiusBuffer.length)];
|
|
||||||
}
|
|
||||||
geomDescCrv.controlPointBuffer = cpBuffer;
|
geomDescCrv.controlPointBuffer = cpBuffer;
|
||||||
geomDescCrv.radiusBuffer = radiusBuffer;
|
geomDescCrv.radiusBuffer = radiusBuffer;
|
||||||
geomDescCrv.controlPointCount = cpData.size();
|
geomDescCrv.controlPointCount = cpData.size();
|
||||||
@@ -758,18 +728,10 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
|
|||||||
|
|
||||||
const size_t num_aabbs = num_motion_steps * num_points;
|
const size_t num_aabbs = num_motion_steps * num_points;
|
||||||
|
|
||||||
MTLResourceOptions storage_mode;
|
|
||||||
if (mtl_device.hasUnifiedMemory) {
|
|
||||||
storage_mode = MTLResourceStorageModeShared;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
storage_mode = MTLResourceStorageModeManaged;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Allocate a GPU buffer for the AABB data and populate it */
|
/* Allocate a GPU buffer for the AABB data and populate it */
|
||||||
id<MTLBuffer> aabbBuf = [mtl_device
|
id<MTLBuffer> aabbBuf = [mtl_device
|
||||||
newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
|
newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
|
MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents];
|
||||||
|
|
||||||
/* Get AABBs for each motion step */
|
/* Get AABBs for each motion step */
|
||||||
@@ -803,10 +765,6 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
|
||||||
[aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)];
|
|
||||||
}
|
|
||||||
|
|
||||||
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
MTLAccelerationStructureGeometryDescriptor *geomDesc;
|
||||||
if (motion_blur) {
|
if (motion_blur) {
|
||||||
std::vector<MTLMotionKeyframeData *> aabb_ptrs;
|
std::vector<MTLMotionKeyframeData *> aabb_ptrs;
|
||||||
@@ -987,12 +945,8 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
/* Defined inside available check, for return type to be available. */
|
/* Defined inside available check, for return type to be available. */
|
||||||
auto make_null_BLAS = [](id<MTLDevice> mtl_device,
|
auto make_null_BLAS = [](id<MTLDevice> mtl_device,
|
||||||
id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
|
id<MTLCommandQueue> queue) -> id<MTLAccelerationStructure> {
|
||||||
MTLResourceOptions storage_mode = MTLResourceStorageModeManaged;
|
id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3)
|
||||||
if (mtl_device.hasUnifiedMemory) {
|
options:MTLResourceStorageModeShared];
|
||||||
storage_mode = MTLResourceStorageModeShared;
|
|
||||||
}
|
|
||||||
|
|
||||||
id<MTLBuffer> nullBuf = [mtl_device newBufferWithLength:sizeof(float3) options:storage_mode];
|
|
||||||
|
|
||||||
/* Create an acceleration structure. */
|
/* Create an acceleration structure. */
|
||||||
MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
|
MTLAccelerationStructureTriangleGeometryDescriptor *geomDesc =
|
||||||
@@ -1080,14 +1034,6 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
return blas_index;
|
return blas_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
MTLResourceOptions storage_mode;
|
|
||||||
if (mtl_device.hasUnifiedMemory) {
|
|
||||||
storage_mode = MTLResourceStorageModeShared;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
storage_mode = MTLResourceStorageModeManaged;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t instance_size;
|
size_t instance_size;
|
||||||
if (motion_blur) {
|
if (motion_blur) {
|
||||||
instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
|
instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor);
|
||||||
@@ -1098,13 +1044,13 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
|
|
||||||
/* Allocate a GPU buffer for the instance data and populate it */
|
/* Allocate a GPU buffer for the instance data and populate it */
|
||||||
id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
|
id<MTLBuffer> instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
id<MTLBuffer> motion_transforms_buf = nil;
|
id<MTLBuffer> motion_transforms_buf = nil;
|
||||||
MTLPackedFloat4x3 *motion_transforms = nullptr;
|
MTLPackedFloat4x3 *motion_transforms = nullptr;
|
||||||
if (motion_blur && num_motion_transforms) {
|
if (motion_blur && num_motion_transforms) {
|
||||||
motion_transforms_buf = [mtl_device
|
motion_transforms_buf = [mtl_device
|
||||||
newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
|
newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3)
|
||||||
options:storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
|
motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1241,14 +1187,6 @@ bool BVHMetal::build_TLAS(Progress &progress,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (storage_mode == MTLResourceStorageModeManaged) {
|
|
||||||
[instanceBuf didModifyRange:NSMakeRange(0, instanceBuf.length)];
|
|
||||||
if (motion_transforms_buf) {
|
|
||||||
[motion_transforms_buf didModifyRange:NSMakeRange(0, motion_transforms_buf.length)];
|
|
||||||
assert(num_motion_transforms == motion_transform_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MTLInstanceAccelerationStructureDescriptor *accelDesc =
|
MTLInstanceAccelerationStructureDescriptor *accelDesc =
|
||||||
[MTLInstanceAccelerationStructureDescriptor descriptor];
|
[MTLInstanceAccelerationStructureDescriptor descriptor];
|
||||||
accelDesc.instanceCount = num_instances;
|
accelDesc.instanceCount = num_instances;
|
||||||
|
|||||||
@@ -56,7 +56,6 @@ class MetalDevice : public Device {
|
|||||||
|
|
||||||
uint kernel_features = 0;
|
uint kernel_features = 0;
|
||||||
bool using_nanovdb = false;
|
bool using_nanovdb = false;
|
||||||
MTLResourceOptions default_storage_mode;
|
|
||||||
int max_threads_per_threadgroup;
|
int max_threads_per_threadgroup;
|
||||||
|
|
||||||
int mtlDevId = 0;
|
int mtlDevId = 0;
|
||||||
@@ -70,7 +69,6 @@ class MetalDevice : public Device {
|
|||||||
uint64_t offset = 0;
|
uint64_t offset = 0;
|
||||||
uint64_t size = 0;
|
uint64_t size = 0;
|
||||||
void *hostPtr = nullptr;
|
void *hostPtr = nullptr;
|
||||||
bool use_UMA = false; /* If true, UMA memory in shared_pointer is being used. */
|
|
||||||
};
|
};
|
||||||
using MetalMemMap = map<device_memory *, unique_ptr<MetalMem>>;
|
using MetalMemMap = map<device_memory *, unique_ptr<MetalMem>>;
|
||||||
MetalMemMap metal_mem_map;
|
MetalMemMap metal_mem_map;
|
||||||
|
|||||||
@@ -86,16 +86,6 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
mtlDevice = usable_devices[mtlDevId];
|
mtlDevice = usable_devices[mtlDevId];
|
||||||
metal_printf("Creating new Cycles Metal device: %s\n", info.description.c_str());
|
metal_printf("Creating new Cycles Metal device: %s\n", info.description.c_str());
|
||||||
|
|
||||||
/* determine default storage mode based on whether UMA is supported */
|
|
||||||
|
|
||||||
default_storage_mode = MTLResourceStorageModeManaged;
|
|
||||||
|
|
||||||
/* We only support Apple Silicon which hasUnifiedMemory support. But leave this check here
|
|
||||||
* just in case a future GPU comes out that doesn't. */
|
|
||||||
if ([mtlDevice hasUnifiedMemory]) {
|
|
||||||
default_storage_mode = MTLResourceStorageModeShared;
|
|
||||||
}
|
|
||||||
|
|
||||||
max_threads_per_threadgroup = 512;
|
max_threads_per_threadgroup = 512;
|
||||||
|
|
||||||
use_metalrt = info.use_hardware_raytracing;
|
use_metalrt = info.use_hardware_raytracing;
|
||||||
@@ -144,9 +134,11 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile
|
|||||||
arg_desc_buffer.access = MTLArgumentAccessReadOnly;
|
arg_desc_buffer.access = MTLArgumentAccessReadOnly;
|
||||||
mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]];
|
mtlBufferArgEncoder = [mtlDevice newArgumentEncoderWithArguments:@[ arg_desc_buffer ]];
|
||||||
|
|
||||||
buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
buffer_bindings_1d = [mtlDevice newBufferWithLength:8192 options:MTLResourceStorageModeShared];
|
||||||
texture_bindings_2d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
texture_bindings_2d = [mtlDevice newBufferWithLength:8192
|
||||||
texture_bindings_3d = [mtlDevice newBufferWithLength:8192 options:default_storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
texture_bindings_3d = [mtlDevice newBufferWithLength:8192
|
||||||
|
options:MTLResourceStorageModeShared];
|
||||||
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||||
texture_bindings_3d.allocatedSize);
|
texture_bindings_3d.allocatedSize);
|
||||||
|
|
||||||
@@ -637,10 +629,6 @@ void MetalDevice::load_texture_info()
|
|||||||
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
[mtlTextureArgEncoder setTexture:nil atIndex:0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
|
||||||
[texture_bindings_2d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
|
|
||||||
[texture_bindings_3d didModifyRange:NSMakeRange(0, num_textures * sizeof(void *))];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -679,7 +667,7 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
|
|||||||
mem.device_pointer = 0;
|
mem.device_pointer = 0;
|
||||||
|
|
||||||
id<MTLBuffer> metal_buffer = nil;
|
id<MTLBuffer> metal_buffer = nil;
|
||||||
MTLResourceOptions options = default_storage_mode;
|
MTLResourceOptions options = MTLResourceStorageModeShared;
|
||||||
|
|
||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
if (mem.type == MEM_DEVICE_ONLY && !capture_enabled) {
|
if (mem.type == MEM_DEVICE_ONLY && !capture_enabled) {
|
||||||
@@ -725,22 +713,15 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
|
|||||||
* pointer recalculation */
|
* pointer recalculation */
|
||||||
mem.device_pointer = device_ptr(mmem.get());
|
mem.device_pointer = device_ptr(mmem.get());
|
||||||
|
|
||||||
if (metal_buffer.storageMode == MTLStorageModeShared) {
|
/* Replace host pointer with our host allocation. */
|
||||||
/* Replace host pointer with our host allocation. */
|
if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) {
|
||||||
|
memcpy(mmem->hostPtr, mem.host_pointer, size);
|
||||||
|
|
||||||
if (mem.host_pointer && mem.host_pointer != mmem->hostPtr) {
|
host_free(mem.type, mem.host_pointer, mem.memory_size());
|
||||||
memcpy(mmem->hostPtr, mem.host_pointer, size);
|
mem.host_pointer = mmem->hostPtr;
|
||||||
|
|
||||||
host_free(mem.type, mem.host_pointer, mem.memory_size());
|
|
||||||
mem.host_pointer = mmem->hostPtr;
|
|
||||||
}
|
|
||||||
mem.shared_pointer = mmem->hostPtr;
|
|
||||||
mem.shared_counter++;
|
|
||||||
mmem->use_UMA = true;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
mmem->use_UMA = false;
|
|
||||||
}
|
}
|
||||||
|
mem.shared_pointer = mmem->hostPtr;
|
||||||
|
mem.shared_counter++;
|
||||||
|
|
||||||
MetalMem *mmem_ptr = mmem.get();
|
MetalMem *mmem_ptr = mmem.get();
|
||||||
metal_mem_map[&mem] = std::move(mmem);
|
metal_mem_map[&mem] = std::move(mmem);
|
||||||
@@ -756,18 +737,7 @@ MetalDevice::MetalMem *MetalDevice::generic_alloc(device_memory &mem)
|
|||||||
|
|
||||||
void MetalDevice::generic_copy_to(device_memory &mem)
|
void MetalDevice::generic_copy_to(device_memory &mem)
|
||||||
{
|
{
|
||||||
if (!mem.host_pointer || !mem.device_pointer) {
|
/* No need to copy - Apple Silicon has Unified Memory Architecture. */
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
|
||||||
if (!metal_mem_map.at(&mem)->use_UMA || mem.host_pointer != mem.shared_pointer) {
|
|
||||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
|
||||||
memcpy(mmem.hostPtr, mem.host_pointer, mem.memory_size());
|
|
||||||
if (mmem.mtlBuffer.storageMode == MTLStorageModeManaged) {
|
|
||||||
[mmem.mtlBuffer didModifyRange:NSMakeRange(0, mem.memory_size())];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::generic_free(device_memory &mem)
|
void MetalDevice::generic_free(device_memory &mem)
|
||||||
@@ -789,18 +759,13 @@ void MetalDevice::generic_free(device_memory &mem)
|
|||||||
|
|
||||||
bool free_mtlBuffer = false;
|
bool free_mtlBuffer = false;
|
||||||
|
|
||||||
if (mmem.use_UMA) {
|
assert(mem.shared_pointer);
|
||||||
assert(mem.shared_pointer);
|
if (mem.shared_pointer) {
|
||||||
if (mem.shared_pointer) {
|
assert(mem.shared_counter > 0);
|
||||||
assert(mem.shared_counter > 0);
|
if (--mem.shared_counter == 0) {
|
||||||
if (--mem.shared_counter == 0) {
|
free_mtlBuffer = true;
|
||||||
free_mtlBuffer = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
free_mtlBuffer = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (free_mtlBuffer) {
|
if (free_mtlBuffer) {
|
||||||
if (mem.host_pointer && mem.host_pointer == mem.shared_pointer) {
|
if (mem.host_pointer && mem.host_pointer == mem.shared_pointer) {
|
||||||
@@ -810,7 +775,6 @@ void MetalDevice::generic_free(device_memory &mem)
|
|||||||
assert(!"Metal device should not copy memory back to host");
|
assert(!"Metal device should not copy memory back to host");
|
||||||
mem.host_pointer = mem.host_alloc(size);
|
mem.host_pointer = mem.host_alloc(size);
|
||||||
memcpy(mem.host_pointer, mem.shared_pointer, size);
|
memcpy(mem.host_pointer, mem.shared_pointer, size);
|
||||||
mmem.use_UMA = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
mem.shared_pointer = nullptr;
|
mem.shared_pointer = nullptr;
|
||||||
@@ -872,36 +836,7 @@ void MetalDevice::mem_move_to_host(device_memory & /*mem*/)
|
|||||||
void MetalDevice::mem_copy_from(
|
void MetalDevice::mem_copy_from(
|
||||||
device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem)
|
device_memory &mem, const size_t y, size_t w, const size_t h, size_t elem)
|
||||||
{
|
{
|
||||||
@autoreleasepool {
|
/* No need to copy - Apple Silicon has Unified Memory Architecture. */
|
||||||
if (mem.host_pointer) {
|
|
||||||
|
|
||||||
bool subcopy = (w >= 0 && h >= 0);
|
|
||||||
const size_t size = subcopy ? (elem * w * h) : mem.memory_size();
|
|
||||||
const size_t offset = subcopy ? (elem * y * w) : 0;
|
|
||||||
|
|
||||||
if (mem.device_pointer) {
|
|
||||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
|
||||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
|
||||||
|
|
||||||
if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
|
|
||||||
|
|
||||||
id<MTLCommandBuffer> cmdBuffer = [mtlGeneralCommandQueue commandBuffer];
|
|
||||||
id<MTLBlitCommandEncoder> blitEncoder = [cmdBuffer blitCommandEncoder];
|
|
||||||
[blitEncoder synchronizeResource:mmem.mtlBuffer];
|
|
||||||
[blitEncoder endEncoding];
|
|
||||||
[cmdBuffer commit];
|
|
||||||
[cmdBuffer waitUntilCompleted];
|
|
||||||
}
|
|
||||||
|
|
||||||
if (mem.host_pointer != mmem.hostPtr) {
|
|
||||||
memcpy((uchar *)mem.host_pointer + offset, (uchar *)mmem.hostPtr + offset, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
memset((char *)mem.host_pointer + offset, 0, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::mem_zero(device_memory &mem)
|
void MetalDevice::mem_zero(device_memory &mem)
|
||||||
@@ -909,17 +844,8 @@ void MetalDevice::mem_zero(device_memory &mem)
|
|||||||
if (!mem.device_pointer) {
|
if (!mem.device_pointer) {
|
||||||
mem_alloc(mem);
|
mem_alloc(mem);
|
||||||
}
|
}
|
||||||
if (!mem.device_pointer) {
|
assert(mem.shared_pointer);
|
||||||
return;
|
memset(mem.shared_pointer, 0, mem.memory_size());
|
||||||
}
|
|
||||||
|
|
||||||
size_t size = mem.memory_size();
|
|
||||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
|
||||||
MetalMem &mmem = *metal_mem_map.at(&mem);
|
|
||||||
memset(mmem.hostPtr, 0, size);
|
|
||||||
if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
|
|
||||||
[mmem.mtlBuffer didModifyRange:NSMakeRange(0, size)];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDevice::mem_free(device_memory &mem)
|
void MetalDevice::mem_free(device_memory &mem)
|
||||||
@@ -1134,10 +1060,6 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MTLStorageMode storage_mode = MTLStorageModeManaged;
|
|
||||||
if ([mtlDevice hasUnifiedMemory]) {
|
|
||||||
storage_mode = MTLStorageModeShared;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* General variables for both architectures */
|
/* General variables for both architectures */
|
||||||
size_t size = mem.memory_size();
|
size_t size = mem.memory_size();
|
||||||
@@ -1212,7 +1134,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||||||
height:mem.data_height
|
height:mem.data_height
|
||||||
mipmapped:NO];
|
mipmapped:NO];
|
||||||
|
|
||||||
desc.storageMode = storage_mode;
|
desc.storageMode = MTLStorageModeShared;
|
||||||
desc.usage = MTLTextureUsageShaderRead;
|
desc.usage = MTLTextureUsageShaderRead;
|
||||||
|
|
||||||
desc.textureType = MTLTextureType3D;
|
desc.textureType = MTLTextureType3D;
|
||||||
@@ -1248,7 +1170,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||||||
height:mem.data_height
|
height:mem.data_height
|
||||||
mipmapped:NO];
|
mipmapped:NO];
|
||||||
|
|
||||||
desc.storageMode = storage_mode;
|
desc.storageMode = MTLStorageModeShared;
|
||||||
desc.usage = MTLTextureUsageShaderRead;
|
desc.usage = MTLTextureUsageShaderRead;
|
||||||
|
|
||||||
VLOG_WORK << "Texture 2D allocate: " << mem.name << ", "
|
VLOG_WORK << "Texture 2D allocate: " << mem.name << ", "
|
||||||
@@ -1301,11 +1223,11 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
|||||||
texture_bindings_3d.allocatedSize);
|
texture_bindings_3d.allocatedSize);
|
||||||
}
|
}
|
||||||
buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length
|
buffer_bindings_1d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||||
options:default_storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
|
texture_bindings_2d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||||
options:default_storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
|
texture_bindings_3d = [mtlDevice newBufferWithLength:min_buffer_length
|
||||||
options:default_storage_mode];
|
options:MTLResourceStorageModeShared];
|
||||||
|
|
||||||
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
stats.mem_alloc(buffer_bindings_1d.allocatedSize + texture_bindings_2d.allocatedSize +
|
||||||
texture_bindings_3d.allocatedSize);
|
texture_bindings_3d.allocatedSize);
|
||||||
@@ -1484,7 +1406,7 @@ void MetalDevice::update_bvh(BVHMetal *bvh_metal)
|
|||||||
// Allocate required buffers for BLAS array.
|
// Allocate required buffers for BLAS array.
|
||||||
uint64_t count = bvh_metal->blas_array.size();
|
uint64_t count = bvh_metal->blas_array.size();
|
||||||
uint64_t buffer_size = mtlBlasArgEncoder.encodedLength * count;
|
uint64_t buffer_size = mtlBlasArgEncoder.encodedLength * count;
|
||||||
blas_buffer = [mtlDevice newBufferWithLength:buffer_size options:default_storage_mode];
|
blas_buffer = [mtlDevice newBufferWithLength:buffer_size options:MTLResourceStorageModeShared];
|
||||||
stats.mem_alloc(blas_buffer.allocatedSize);
|
stats.mem_alloc(blas_buffer.allocatedSize);
|
||||||
|
|
||||||
for (uint64_t i = 0; i < count; ++i) {
|
for (uint64_t i = 0; i < count; ++i) {
|
||||||
@@ -1493,9 +1415,6 @@ void MetalDevice::update_bvh(BVHMetal *bvh_metal)
|
|||||||
[mtlBlasArgEncoder setAccelerationStructure:bvh_metal->blas_array[i] atIndex:0];
|
[mtlBlasArgEncoder setAccelerationStructure:bvh_metal->blas_array[i] atIndex:0];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (default_storage_mode == MTLResourceStorageModeManaged) {
|
|
||||||
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
CCL_NAMESPACE_END
|
||||||
|
|||||||
@@ -71,13 +71,6 @@ class MetalDeviceQueue : public DeviceQueue {
|
|||||||
dispatch_queue_t event_queue_;
|
dispatch_queue_t event_queue_;
|
||||||
dispatch_semaphore_t wait_semaphore_;
|
dispatch_semaphore_t wait_semaphore_;
|
||||||
|
|
||||||
struct CopyBack {
|
|
||||||
void *host_pointer;
|
|
||||||
void *gpu_mem;
|
|
||||||
uint64_t size;
|
|
||||||
};
|
|
||||||
std::vector<CopyBack> copy_back_mem_;
|
|
||||||
|
|
||||||
uint64_t shared_event_id_;
|
uint64_t shared_event_id_;
|
||||||
uint64_t command_buffers_submitted_ = 0;
|
uint64_t command_buffers_submitted_ = 0;
|
||||||
uint64_t command_buffers_completed_ = 0;
|
uint64_t command_buffers_completed_ = 0;
|
||||||
|
|||||||
@@ -398,17 +398,8 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
plain_old_launch_data_size);
|
plain_old_launch_data_size);
|
||||||
|
|
||||||
/* Allocate an argument buffer. */
|
/* Allocate an argument buffer. */
|
||||||
MTLResourceOptions arg_buffer_options = MTLResourceStorageModeManaged;
|
id<MTLBuffer> arg_buffer = temp_buffer_pool_.get_buffer(
|
||||||
if ([mtlDevice_ hasUnifiedMemory]) {
|
mtlDevice_, mtlCommandBuffer_, arg_buffer_length, init_arg_buffer, stats_);
|
||||||
arg_buffer_options = MTLResourceStorageModeShared;
|
|
||||||
}
|
|
||||||
|
|
||||||
id<MTLBuffer> arg_buffer = temp_buffer_pool_.get_buffer(mtlDevice_,
|
|
||||||
mtlCommandBuffer_,
|
|
||||||
arg_buffer_length,
|
|
||||||
arg_buffer_options,
|
|
||||||
init_arg_buffer,
|
|
||||||
stats_);
|
|
||||||
|
|
||||||
/* Encode the pointer "enqueue" arguments */
|
/* Encode the pointer "enqueue" arguments */
|
||||||
bytes_written = 0;
|
bytes_written = 0;
|
||||||
@@ -516,10 +507,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
bytes_written = metal_offsets + metal_device_->mtlAncillaryArgEncoder.encodedLength;
|
bytes_written = metal_offsets + metal_device_->mtlAncillaryArgEncoder.encodedLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (arg_buffer.storageMode == MTLStorageModeManaged) {
|
|
||||||
[arg_buffer didModifyRange:NSMakeRange(0, bytes_written)];
|
|
||||||
}
|
|
||||||
|
|
||||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0];
|
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:0 atIndex:0];
|
||||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1];
|
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:globals_offsets atIndex:1];
|
||||||
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2];
|
[mtlComputeCommandEncoder setBuffer:arg_buffer offset:metal_offsets atIndex:2];
|
||||||
@@ -624,15 +611,6 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
|
|||||||
for (auto &it : metal_device_->metal_mem_map) {
|
for (auto &it : metal_device_->metal_mem_map) {
|
||||||
const string c_integrator_queue_counter = "integrator_queue_counter";
|
const string c_integrator_queue_counter = "integrator_queue_counter";
|
||||||
if (it.first->name == c_integrator_queue_counter) {
|
if (it.first->name == c_integrator_queue_counter) {
|
||||||
/* Workaround "device_copy_from" being protected. */
|
|
||||||
struct MyDeviceMemory : device_memory {
|
|
||||||
void device_copy_from__IntegratorQueueCounter()
|
|
||||||
{
|
|
||||||
device_copy_from(0, data_width, 1, sizeof(IntegratorQueueCounter));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
((MyDeviceMemory *)it.first)->device_copy_from__IntegratorQueueCounter();
|
|
||||||
|
|
||||||
if (IntegratorQueueCounter *queue_counter = (IntegratorQueueCounter *)
|
if (IntegratorQueueCounter *queue_counter = (IntegratorQueueCounter *)
|
||||||
it.first->host_pointer)
|
it.first->host_pointer)
|
||||||
{
|
{
|
||||||
@@ -701,11 +679,6 @@ bool MetalDeviceQueue::synchronize()
|
|||||||
|
|
||||||
[mtlCommandBuffer_ release];
|
[mtlCommandBuffer_ release];
|
||||||
|
|
||||||
for (const CopyBack &mmem : copy_back_mem_) {
|
|
||||||
memcpy((uchar *)mmem.host_pointer, (uchar *)mmem.gpu_mem, mmem.size);
|
|
||||||
}
|
|
||||||
copy_back_mem_.clear();
|
|
||||||
|
|
||||||
temp_buffer_pool_.process_command_buffer_completion(mtlCommandBuffer_);
|
temp_buffer_pool_.process_command_buffer_completion(mtlCommandBuffer_);
|
||||||
metal_device_->flush_delayed_free_list();
|
metal_device_->flush_delayed_free_list();
|
||||||
|
|
||||||
@@ -768,79 +741,13 @@ void MetalDeviceQueue::copy_to_device(device_memory &mem)
|
|||||||
|
|
||||||
assert(mem.device_pointer != 0);
|
assert(mem.device_pointer != 0);
|
||||||
assert(mem.host_pointer != nullptr);
|
assert(mem.host_pointer != nullptr);
|
||||||
|
/* No need to copy - Apple Silicon has Unified Memory Architecture. */
|
||||||
std::lock_guard<std::recursive_mutex> lock(metal_device_->metal_mem_map_mutex);
|
|
||||||
auto result = metal_device_->metal_mem_map.find(&mem);
|
|
||||||
if (result != metal_device_->metal_mem_map.end()) {
|
|
||||||
if (mem.host_pointer == mem.shared_pointer) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
MetalDevice::MetalMem &mmem = *result->second;
|
|
||||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
|
||||||
|
|
||||||
id<MTLBuffer> buffer = temp_buffer_pool_.get_buffer(mtlDevice_,
|
|
||||||
mtlCommandBuffer_,
|
|
||||||
mmem.size,
|
|
||||||
MTLResourceStorageModeShared,
|
|
||||||
mem.host_pointer,
|
|
||||||
stats_);
|
|
||||||
|
|
||||||
[blitEncoder copyFromBuffer:buffer
|
|
||||||
sourceOffset:0
|
|
||||||
toBuffer:mmem.mtlBuffer
|
|
||||||
destinationOffset:mmem.offset
|
|
||||||
size:mmem.size];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
metal_device_->mem_copy_to(mem);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDeviceQueue::copy_from_device(device_memory &mem)
|
void MetalDeviceQueue::copy_from_device(device_memory &mem)
|
||||||
{
|
{
|
||||||
@autoreleasepool {
|
/* No need to copy - Apple Silicon has Unified Memory Architecture. */
|
||||||
if (metal_device_->have_error()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(mem.type != MEM_GLOBAL && mem.type != MEM_TEXTURE);
|
|
||||||
|
|
||||||
if (mem.memory_size() == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(mem.device_pointer != 0);
|
|
||||||
assert(mem.host_pointer != nullptr);
|
|
||||||
|
|
||||||
std::lock_guard<std::recursive_mutex> lock(metal_device_->metal_mem_map_mutex);
|
|
||||||
MetalDevice::MetalMem &mmem = *metal_device_->metal_mem_map.at(&mem);
|
|
||||||
if (mmem.mtlBuffer) {
|
|
||||||
const size_t size = mem.memory_size();
|
|
||||||
|
|
||||||
if (mem.device_pointer) {
|
|
||||||
if ([mmem.mtlBuffer storageMode] == MTLStorageModeManaged) {
|
|
||||||
id<MTLBlitCommandEncoder> blitEncoder = get_blit_encoder();
|
|
||||||
[blitEncoder synchronizeResource:mmem.mtlBuffer];
|
|
||||||
}
|
|
||||||
if (mem.host_pointer != mmem.hostPtr) {
|
|
||||||
if (mtlCommandBuffer_) {
|
|
||||||
copy_back_mem_.push_back({mem.host_pointer, mmem.hostPtr, size});
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
memcpy((uchar *)mem.host_pointer, (uchar *)mmem.hostPtr, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
memset((char *)mem.host_pointer, 0, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
metal_device_->mem_copy_from(mem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MetalDeviceQueue::prepare_resources(DeviceKernel /*kernel*/)
|
void MetalDeviceQueue::prepare_resources(DeviceKernel /*kernel*/)
|
||||||
|
|||||||
@@ -54,7 +54,6 @@ class MetalBufferPool {
|
|||||||
id<MTLBuffer> get_buffer(id<MTLDevice> device,
|
id<MTLBuffer> get_buffer(id<MTLDevice> device,
|
||||||
id<MTLCommandBuffer> command_buffer,
|
id<MTLCommandBuffer> command_buffer,
|
||||||
NSUInteger length,
|
NSUInteger length,
|
||||||
MTLResourceOptions options,
|
|
||||||
const void *pointer,
|
const void *pointer,
|
||||||
Stats &stats);
|
Stats &stats);
|
||||||
void process_command_buffer_completion(id<MTLCommandBuffer> command_buffer);
|
void process_command_buffer_completion(id<MTLCommandBuffer> command_buffer);
|
||||||
|
|||||||
@@ -95,7 +95,9 @@ const vector<id<MTLDevice>> &MetalInfo::get_usable_devices()
|
|||||||
strstr(device_name_char, "Apple"))
|
strstr(device_name_char, "Apple"))
|
||||||
{
|
{
|
||||||
/* TODO: Implement a better way to identify device vendor instead of relying on name. */
|
/* TODO: Implement a better way to identify device vendor instead of relying on name. */
|
||||||
usable = true;
|
/* We only support Apple Silicon GPUs which all have unified memory, but explicitly check
|
||||||
|
* just in case it ever changes. */
|
||||||
|
usable = [device hasUnifiedMemory];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -119,24 +121,15 @@ const vector<id<MTLDevice>> &MetalInfo::get_usable_devices()
|
|||||||
id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
||||||
id<MTLCommandBuffer> command_buffer,
|
id<MTLCommandBuffer> command_buffer,
|
||||||
NSUInteger length,
|
NSUInteger length,
|
||||||
MTLResourceOptions options,
|
|
||||||
const void *pointer,
|
const void *pointer,
|
||||||
Stats &stats)
|
Stats &stats)
|
||||||
{
|
{
|
||||||
id<MTLBuffer> buffer = nil;
|
id<MTLBuffer> buffer = nil;
|
||||||
|
|
||||||
MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
|
|
||||||
MTLResourceStorageModeShift);
|
|
||||||
MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
|
|
||||||
MTLResourceCPUCacheModeShift);
|
|
||||||
|
|
||||||
{
|
{
|
||||||
thread_scoped_lock lock(buffer_mutex);
|
thread_scoped_lock lock(buffer_mutex);
|
||||||
/* Find an unused buffer with matching size and storage mode. */
|
/* Find an unused buffer with matching size and storage mode. */
|
||||||
for (MetalBufferListEntry &bufferEntry : temp_buffers) {
|
for (MetalBufferListEntry &bufferEntry : temp_buffers) {
|
||||||
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
|
if (bufferEntry.buffer.length == length && bufferEntry.command_buffer == nil) {
|
||||||
cpuCacheMode == bufferEntry.buffer.cpuCacheMode && bufferEntry.command_buffer == nil)
|
|
||||||
{
|
|
||||||
buffer = bufferEntry.buffer;
|
buffer = bufferEntry.buffer;
|
||||||
bufferEntry.command_buffer = command_buffer;
|
bufferEntry.command_buffer = command_buffer;
|
||||||
break;
|
break;
|
||||||
@@ -145,7 +138,7 @@ id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
|||||||
if (!buffer) {
|
if (!buffer) {
|
||||||
/* Create a new buffer and add it to the pool. Typically this pool will only grow to a
|
/* Create a new buffer and add it to the pool. Typically this pool will only grow to a
|
||||||
* handful of entries. */
|
* handful of entries. */
|
||||||
buffer = [device newBufferWithLength:length options:options];
|
buffer = [device newBufferWithLength:length options:MTLResourceStorageModeShared];
|
||||||
stats.mem_alloc(buffer.allocatedSize);
|
stats.mem_alloc(buffer.allocatedSize);
|
||||||
total_temp_mem_size += buffer.allocatedSize;
|
total_temp_mem_size += buffer.allocatedSize;
|
||||||
temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
|
temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
|
||||||
@@ -155,9 +148,6 @@ id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
|||||||
/* Copy over data */
|
/* Copy over data */
|
||||||
if (pointer) {
|
if (pointer) {
|
||||||
memcpy(buffer.contents, pointer, length);
|
memcpy(buffer.contents, pointer, length);
|
||||||
if (buffer.storageMode == MTLStorageModeManaged) {
|
|
||||||
[buffer didModifyRange:NSMakeRange(0, length)];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return buffer;
|
return buffer;
|
||||||
|
|||||||
Reference in New Issue
Block a user