Merge branch 'blender-v4.2-release'

This commit is contained in:
Weizhen Huang
2024-07-08 16:19:41 +02:00
11 changed files with 242 additions and 181 deletions

View File

@@ -57,6 +57,9 @@ class BVHMetal : public BVH {
Geometry *const geom,
bool refit);
bool build_TLAS(Progress &progress, id<MTLDevice> device, id<MTLCommandQueue> queue, bool refit);
API_AVAILABLE(macos(11.0))
void set_accel_struct(id<MTLAccelerationStructure> new_accel_struct);
};
CCL_NAMESPACE_END

View File

@@ -119,17 +119,27 @@ BVHMetal::BVHMetal(const BVHParams &params_,
BVHMetal::~BVHMetal()
{
/* Clear point used by enqueueing. */
device->release_bvh(this);
if (@available(macos 12.0, *)) {
set_accel_struct(nil);
if (null_BLAS) {
[null_BLAS release];
}
}
}
API_AVAILABLE(macos(11.0))
void BVHMetal::set_accel_struct(id<MTLAccelerationStructure> new_accel_struct)
{
if (@available(macos 12.0, *)) {
if (accel_struct) {
device->stats.mem_free(accel_struct.allocatedSize);
[accel_struct release];
accel_struct = nil;
}
if (null_BLAS) {
[null_BLAS release];
if (new_accel_struct) {
accel_struct = new_accel_struct;
device->stats.mem_alloc(accel_struct.allocatedSize);
}
}
}
@@ -325,9 +335,7 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress,
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
accel_struct = accel;
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
@@ -338,10 +346,7 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress,
}
else {
/* set our acceleration structure to the uncompressed structure */
accel_struct = accel_uncompressed;
uint64_t allocated_size = [accel_struct allocatedSize];
device->stats.mem_alloc(allocated_size);
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
@@ -663,9 +668,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
accel_struct = accel;
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
@@ -676,10 +679,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress,
}
else {
/* set our acceleration structure to the uncompressed structure */
accel_struct = accel_uncompressed;
uint64_t allocated_size = [accel_struct allocatedSize];
device->stats.mem_alloc(allocated_size);
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
@@ -910,9 +910,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
toAccelerationStructure:accel];
[accelEnc endEncoding];
[accelCommands addCompletedHandler:^(id<MTLCommandBuffer> /*command_buffer*/) {
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
accel_struct = accel;
set_accel_struct(accel);
[accel_uncompressed release];
/* Signal that we've finished doing GPU acceleration struct build. */
@@ -923,10 +921,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress,
}
else {
/* set our acceleration structure to the uncompressed structure */
accel_struct = accel_uncompressed;
uint64_t allocated_size = [accel_struct allocatedSize];
device->stats.mem_alloc(allocated_size);
set_accel_struct(accel_uncompressed);
/* Signal that we've finished doing GPU acceleration struct build. */
g_bvh_build_throttler.release(wired_size);
@@ -1036,10 +1031,6 @@ bool BVHMetal::build_TLAS(Progress &progress,
for (Object *ob : objects) {
num_instances++;
/* Skip motion for non-traceable objects */
if (!ob->is_traceable())
continue;
if (ob->use_motion()) {
num_motion_transforms += max((size_t)1, ob->get_motion().size());
}
@@ -1115,8 +1106,8 @@ bool BVHMetal::build_TLAS(Progress &progress,
/* Skip non-traceable objects */
Geometry const *geom = ob->get_geometry();
BVHMetal const *blas = static_cast<BVHMetal const *>(geom->bvh);
if (!blas || !blas->accel_struct) {
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_mtl_device_index()
if (!blas || !blas->accel_struct || !ob->is_traceable()) {
/* Place a degenerate instance, to ensure [[instance_id]] equals ob->get_device_index()
* in our intersection functions */
blas = nullptr;
@@ -1299,11 +1290,8 @@ bool BVHMetal::build_TLAS(Progress &progress,
[instanceBuf release];
[scratchBuf release];
uint64_t allocated_size = [accel allocatedSize];
device->stats.mem_alloc(allocated_size);
/* Cache top and bottom-level acceleration structs */
accel_struct = accel;
set_accel_struct(accel);
unique_blas_array.clear();
unique_blas_array.reserve(all_blas.count);
@@ -1322,16 +1310,18 @@ bool BVHMetal::build(Progress &progress,
bool refit)
{
if (@available(macos 12.0, *)) {
if (refit && params.bvh_type != BVH_TYPE_STATIC) {
assert(accel_struct);
}
else {
if (accel_struct) {
device->stats.mem_free(accel_struct.allocatedSize);
[accel_struct release];
accel_struct = nil;
if (refit) {
/* It isn't valid to refit a non-existent BVH, or one which wasn't constructed as dynamic.
* In such cases, assert in development but try to recover in the wild. */
if (params.bvh_type != BVH_TYPE_DYNAMIC || !accel_struct) {
assert(false);
refit = false;
}
}
if (!refit) {
set_accel_struct(nil);
}
}
@autoreleasepool {

View File

@@ -39,10 +39,19 @@ class MetalDevice : public Device {
KernelParamsMetal launch_params = {0};
/* MetalRT members ----------------------------------*/
BVHMetal *bvhMetalRT = nullptr;
bool use_metalrt = false;
bool motion_blur = false;
id<MTLArgumentEncoder> mtlASArgEncoder =
nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
id<MTLBuffer> blas_buffer = nil;
API_AVAILABLE(macos(11.0))
vector<id<MTLAccelerationStructure>> unique_blas_array;
API_AVAILABLE(macos(11.0))
id<MTLAccelerationStructure> accel_struct = nil;
/*---------------------------------------------------*/
uint kernel_features;
@@ -79,11 +88,6 @@ class MetalDevice : public Device {
id<MTLBuffer> texture_bindings_3d = nil;
std::vector<id<MTLTexture>> texture_slot_map;
/* BLAS encoding & lookup */
id<MTLArgumentEncoder> mtlBlasArgEncoder = nil;
id<MTLBuffer> blas_buffer = nil;
bool use_metalrt = false;
MetalPipelineType kernel_specialization_level = PSO_GENERIC;
int device_id = 0;
@@ -138,8 +142,6 @@ class MetalDevice : public Device {
virtual void build_bvh(BVH *bvh, Progress &progress, bool refit) override;
virtual void release_bvh(BVH *bvh) override;
virtual void optimize_for_scene(Scene *scene) override;
static void compile_and_load(int device_id, MetalPipelineType pso_type);
@@ -184,6 +186,10 @@ class MetalDevice : public Device {
void tex_free(device_texture &mem);
void flush_delayed_free_list();
void free_bvh();
void update_bvh(BVHMetal *bvh_metal);
};
CCL_NAMESPACE_END

View File

@@ -267,6 +267,7 @@ MetalDevice::~MetalDevice()
}
}
free_bvh();
flush_delayed_free_list();
if (texture_bindings_2d) {
@@ -1372,24 +1373,7 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
if (bvh_metal->build(progress, mtlDevice, mtlGeneralCommandQueue, refit)) {
if (bvh->params.top_level) {
bvhMetalRT = bvh_metal;
// allocate required buffers for BLAS array
uint64_t count = bvhMetalRT->blas_array.size();
uint64_t bufferSize = mtlBlasArgEncoder.encodedLength * count;
blas_buffer = [mtlDevice newBufferWithLength:bufferSize options:default_storage_mode];
stats.mem_alloc(blas_buffer.allocatedSize);
for (uint64_t i = 0; i < count; ++i) {
if (bvhMetalRT->blas_array[i]) {
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer
offset:i * mtlBlasArgEncoder.encodedLength];
[mtlBlasArgEncoder setAccelerationStructure:bvhMetalRT->blas_array[i] atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
}
update_bvh(bvh_metal);
}
}
@@ -1399,10 +1383,54 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit)
}
}
void MetalDevice::release_bvh(BVH *bvh)
void MetalDevice::free_bvh()
{
if (bvhMetalRT == bvh) {
bvhMetalRT = nullptr;
for (id<MTLAccelerationStructure> &blas : unique_blas_array) {
[blas release];
}
unique_blas_array.clear();
if (blas_buffer) {
[blas_buffer release];
blas_buffer = nil;
}
if (accel_struct) {
[accel_struct release];
accel_struct = nil;
}
}
void MetalDevice::update_bvh(BVHMetal *bvh_metal)
{
free_bvh();
if (!bvh_metal) {
return;
}
accel_struct = bvh_metal->accel_struct;
unique_blas_array = bvh_metal->unique_blas_array;
[accel_struct retain];
for (id<MTLAccelerationStructure> &blas : unique_blas_array) {
[blas retain];
}
// Allocate required buffers for BLAS array.
uint64_t count = bvh_metal->blas_array.size();
uint64_t buffer_size = mtlBlasArgEncoder.encodedLength * count;
blas_buffer = [mtlDevice newBufferWithLength:buffer_size options:default_storage_mode];
stats.mem_alloc(blas_buffer.allocatedSize);
for (uint64_t i = 0; i < count; ++i) {
if (bvh_metal->blas_array[i]) {
[mtlBlasArgEncoder setArgumentBuffer:blas_buffer offset:i * mtlBlasArgEncoder.encodedLength];
[mtlBlasArgEncoder setAccelerationStructure:bvh_metal->blas_array[i] atIndex:0];
}
}
if (default_storage_mode == MTLResourceStorageModeManaged) {
[blas_buffer didModifyRange:NSMakeRange(0, blas_buffer.length)];
}
}

View File

@@ -54,10 +54,12 @@ enum MetalPipelineType {
const char *kernel_type_as_string(MetalPipelineType pso_type);
struct MetalKernelPipeline {
/* A pipeline object that can be shared between multiple instances of MetalDeviceQueue. */
class MetalKernelPipeline {
public:
void compile();
int pipeline_id;
int originating_device_id;
id<MTLLibrary> mtlLibrary = nil;
@@ -83,6 +85,28 @@ struct MetalKernelPipeline {
string error_str;
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
};
/* An actively instanced pipeline that can only be used by a single instance of MetalDeviceQueue.
*/
class MetalDispatchPipeline {
public:
~MetalDispatchPipeline();
bool update(MetalDevice *metal_device, DeviceKernel kernel);
void free_intersection_function_tables();
private:
friend class MetalDeviceQueue;
friend struct ShaderCache;
int pipeline_id = -1;
MetalPipelineType pso_type;
id<MTLComputePipelineState> pipeline = nil;
int num_threads_per_block = 0;
API_AVAILABLE(macos(11.0))
id<MTLIntersectionFunctionTable> intersection_func_table[METALRT_TABLE_NUM] = {nil};
};

View File

@@ -133,6 +133,9 @@ using DeviceShaderCache = std::pair<id<MTLDevice>, unique_ptr<ShaderCache>>;
int g_shaderCacheCount = 0;
DeviceShaderCache g_shaderCache[MAX_POSSIBLE_GPUS_ON_SYSTEM];
/* Next UID for associating a MetalDispatchPipeline with an originating MetalKernelPipeline. */
static std::atomic_int g_next_pipeline_id = 0;
ShaderCache *get_shader_cache(id<MTLDevice> mtlDevice)
{
for (int i = 0; i < g_shaderCacheCount; i++) {
@@ -325,6 +328,7 @@ void ShaderCache::load_kernel(DeviceKernel device_kernel,
/* Keep track of the originating device's ID so that we can cancel requests if the device ceases
* to be active. */
pipeline->pipeline_id = g_next_pipeline_id.fetch_add(1);
pipeline->originating_device_id = device->device_id;
memcpy(&pipeline->kernel_data_, &device->launch_params.data, sizeof(pipeline->kernel_data_));
pipeline->pso_type = pso_type;
@@ -450,6 +454,64 @@ static MTLFunctionConstantValues *GetConstantValues(KernelData const *data = nul
return constant_values;
}
void MetalDispatchPipeline::free_intersection_function_tables()
{
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
if (intersection_func_table[table]) {
[intersection_func_table[table] release];
intersection_func_table[table] = nil;
}
}
}
MetalDispatchPipeline::~MetalDispatchPipeline()
{
free_intersection_function_tables();
}
bool MetalDispatchPipeline::update(MetalDevice *metal_device, DeviceKernel kernel)
{
const MetalKernelPipeline *best_pipeline = MetalDeviceKernels::get_best_pipeline(metal_device,
kernel);
if (!best_pipeline) {
return false;
}
if (pipeline_id == best_pipeline->pipeline_id) {
/* The best pipeline is already active - nothing to do. */
return true;
}
pipeline_id = best_pipeline->pipeline_id;
pipeline = best_pipeline->pipeline;
pso_type = best_pipeline->pso_type;
num_threads_per_block = best_pipeline->num_threads_per_block;
/* Create the MTLIntersectionFunctionTables if needed. */
if (best_pipeline->use_metalrt && device_kernel_has_intersection(best_pipeline->device_kernel)) {
free_intersection_function_tables();
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
@autoreleasepool {
MTLIntersectionFunctionTableDescriptor *ift_desc =
[[MTLIntersectionFunctionTableDescriptor alloc] init];
ift_desc.functionCount = best_pipeline->table_functions[table].count;
intersection_func_table[table] = [this->pipeline
newIntersectionFunctionTableWithDescriptor:ift_desc];
/* Finally write the function handles into this pipeline's table */
int size = int([best_pipeline->table_functions[table] count]);
for (int i = 0; i < size; i++) {
id<MTLFunctionHandle> handle = [pipeline
functionHandleWithFunction:best_pipeline->table_functions[table][i]];
[intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}
}
return true;
}
id<MTLFunction> MetalKernelPipeline::make_intersection_function(const char *function_name)
{
MTLFunctionDescriptor *desc = [MTLIntersectionFunctionDescriptor functionDescriptor];
@@ -507,7 +569,6 @@ void MetalKernelPipeline::compile()
function.label = [@(function_name.c_str()) copy];
NSArray *table_functions[METALRT_TABLE_NUM] = {nil};
NSArray *linked_functions = nil;
if (use_metalrt && device_kernel_has_intersection(device_kernel)) {
@@ -754,24 +815,6 @@ void MetalKernelPipeline::compile()
[computePipelineStateDescriptor release];
computePipelineStateDescriptor = nil;
if (use_metalrt && linked_functions) {
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
MTLIntersectionFunctionTableDescriptor *ift_desc =
[[MTLIntersectionFunctionTableDescriptor alloc] init];
ift_desc.functionCount = table_functions[table].count;
intersection_func_table[table] = [this->pipeline
newIntersectionFunctionTableWithDescriptor:ift_desc];
/* Finally write the function handles into this pipeline's table */
int size = (int)[table_functions[table] count];
for (int i = 0; i < size; i++) {
id<MTLFunctionHandle> handle = [pipeline
functionHandleWithFunction:table_functions[table][i]];
[intersection_func_table[table] setFunction:handle atIndex:i];
}
}
}
if (!use_binary_archive) {
metal_printf("%16s | %2d | %-55s | %7.2fs\n",
kernel_type_as_string(pso_type),

View File

@@ -66,6 +66,7 @@ class MetalDeviceQueue : public DeviceQueue {
id<MTLSharedEvent> shared_event_ = nil;
API_AVAILABLE(macos(10.14), ios(14.0))
MTLSharedEventListener *shared_event_listener_ = nil;
MetalDispatchPipeline active_pipelines_[DEVICE_KERNEL_NUM];
dispatch_queue_t event_queue_;
dispatch_semaphore_t wait_semaphore_;

View File

@@ -465,13 +465,12 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
bytes_written = globals_offsets + sizeof(KernelParamsMetal);
const MetalKernelPipeline *metal_kernel_pso = MetalDeviceKernels::get_best_pipeline(
metal_device_, kernel);
if (!metal_kernel_pso) {
if (!active_pipelines_[kernel].update(metal_device_, kernel)) {
metal_device_->set_error(
string_printf("No MetalKernelPipeline for %s\n", device_kernel_as_string(kernel)));
string_printf("Could not activate pipeline for %s\n", device_kernel_as_string(kernel)));
return false;
}
MetalDispatchPipeline &active_pipeline = active_pipelines_[kernel];
/* Encode ancillaries */
[metal_device_->mtlAncillaryArgEncoder setArgumentBuffer:arg_buffer offset:metal_offsets];
@@ -487,8 +486,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
if (@available(macos 12.0, *)) {
if (metal_device_->use_metalrt && device_kernel_has_intersection(kernel)) {
if (metal_device_->bvhMetalRT) {
id<MTLAccelerationStructure> accel_struct = metal_device_->bvhMetalRT->accel_struct;
if (id<MTLAccelerationStructure> accel_struct = metal_device_->accel_struct) {
[metal_device_->mtlAncillaryArgEncoder setAccelerationStructure:accel_struct atIndex:3];
[metal_device_->mtlAncillaryArgEncoder setBuffer:metal_device_->blas_buffer
offset:0
@@ -496,14 +494,14 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
}
for (int table = 0; table < METALRT_TABLE_NUM; table++) {
if (metal_kernel_pso->intersection_func_table[table]) {
[metal_kernel_pso->intersection_func_table[table] setBuffer:arg_buffer
offset:globals_offsets
atIndex:1];
if (active_pipeline.intersection_func_table[table]) {
[active_pipeline.intersection_func_table[table] setBuffer:arg_buffer
offset:globals_offsets
atIndex:1];
[metal_device_->mtlAncillaryArgEncoder
setIntersectionFunctionTable:metal_kernel_pso->intersection_func_table[table]
setIntersectionFunctionTable:active_pipeline.intersection_func_table[table]
atIndex:4 + table];
[mtlComputeCommandEncoder useResource:metal_kernel_pso->intersection_func_table[table]
[mtlComputeCommandEncoder useResource:active_pipeline.intersection_func_table[table]
usage:MTLResourceUsageRead];
}
else {
@@ -526,24 +524,22 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
if (metal_device_->use_metalrt && device_kernel_has_intersection(kernel)) {
if (@available(macos 12.0, *)) {
BVHMetal *bvhMetalRT = metal_device_->bvhMetalRT;
if (bvhMetalRT && bvhMetalRT->accel_struct) {
if (id<MTLAccelerationStructure> accel_struct = metal_device_->accel_struct) {
/* Mark all Accelerations resources as used */
[mtlComputeCommandEncoder useResource:bvhMetalRT->accel_struct
usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResource:accel_struct usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResource:metal_device_->blas_buffer
usage:MTLResourceUsageRead];
[mtlComputeCommandEncoder useResources:bvhMetalRT->unique_blas_array.data()
count:bvhMetalRT->unique_blas_array.size()
[mtlComputeCommandEncoder useResources:metal_device_->unique_blas_array.data()
count:metal_device_->unique_blas_array.size()
usage:MTLResourceUsageRead];
}
}
}
[mtlComputeCommandEncoder setComputePipelineState:metal_kernel_pso->pipeline];
[mtlComputeCommandEncoder setComputePipelineState:active_pipeline.pipeline];
/* Compute kernel launch parameters. */
const int num_threads_per_block = metal_kernel_pso->num_threads_per_block;
const int num_threads_per_block = active_pipeline.num_threads_per_block;
int shared_mem_bytes = 0;
@@ -594,7 +590,7 @@ bool MetalDeviceQueue::enqueue(DeviceKernel kernel,
const char *errCStr = [[NSString stringWithFormat:@"%@", command_buffer.error]
UTF8String];
str += string_printf("(%s.%s):\n%s\n",
kernel_type_as_string(metal_kernel_pso->pso_type),
kernel_type_as_string(active_pipeline.pso_type),
device_kernel_as_string(kernel),
errCStr);
}

View File

@@ -41,23 +41,14 @@ struct MetalInfo {
/* Pool of MTLBuffers whose lifetime is linked to a single MTLCommandBuffer */
class MetalBufferPool {
struct MetalBufferListEntry {
MetalBufferListEntry(id<MTLBuffer> buffer, id<MTLCommandBuffer> command_buffer)
: buffer(buffer), command_buffer(command_buffer)
{
}
MetalBufferListEntry() = delete;
id<MTLBuffer> buffer;
id<MTLCommandBuffer> command_buffer;
};
std::vector<MetalBufferListEntry> buffer_free_list;
std::vector<MetalBufferListEntry> buffer_in_use_list;
std::vector<MetalBufferListEntry> temp_buffers;
thread_mutex buffer_mutex;
size_t total_temp_mem_size = 0;
public:
MetalBufferPool() = default;
~MetalBufferPool();
id<MTLBuffer> get_buffer(id<MTLDevice> device,

View File

@@ -123,53 +123,42 @@ id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
const void *pointer,
Stats &stats)
{
id<MTLBuffer> buffer;
id<MTLBuffer> buffer = nil;
MTLStorageMode storageMode = MTLStorageMode((options & MTLResourceStorageModeMask) >>
MTLResourceStorageModeShift);
MTLCPUCacheMode cpuCacheMode = MTLCPUCacheMode((options & MTLResourceCPUCacheModeMask) >>
MTLResourceCPUCacheModeShift);
buffer_mutex.lock();
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end(); entry++) {
MetalBufferListEntry bufferEntry = *entry;
/* Check if buffer matches size and storage mode and is old enough to reuse */
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
cpuCacheMode == bufferEntry.buffer.cpuCacheMode)
{
buffer = bufferEntry.buffer;
buffer_free_list.erase(entry);
bufferEntry.command_buffer = command_buffer;
buffer_in_use_list.push_back(bufferEntry);
buffer_mutex.unlock();
/* Copy over data */
if (pointer) {
memcpy(buffer.contents, pointer, length);
if (bufferEntry.buffer.storageMode == MTLStorageModeManaged) {
[buffer didModifyRange:NSMakeRange(0, length)];
}
{
thread_scoped_lock lock(buffer_mutex);
/* Find an unused buffer with matching size and storage mode. */
for (MetalBufferListEntry &bufferEntry : temp_buffers) {
if (bufferEntry.buffer.length == length && storageMode == bufferEntry.buffer.storageMode &&
cpuCacheMode == bufferEntry.buffer.cpuCacheMode && bufferEntry.command_buffer == nil)
{
buffer = bufferEntry.buffer;
bufferEntry.command_buffer = command_buffer;
break;
}
return buffer;
}
if (!buffer) {
/* Create a new buffer and add it to the pool. Typically this pool will only grow to a
* handful of entries. */
buffer = [device newBufferWithLength:length options:options];
stats.mem_alloc(buffer.allocatedSize);
total_temp_mem_size += buffer.allocatedSize;
temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
}
}
// NSLog(@"Creating buffer of length %lu (%lu)", length, frameCount);
/* Copy over data */
if (pointer) {
buffer = [device newBufferWithBytes:pointer length:length options:options];
memcpy(buffer.contents, pointer, length);
if (buffer.storageMode == MTLStorageModeManaged) {
[buffer didModifyRange:NSMakeRange(0, length)];
}
}
else {
buffer = [device newBufferWithLength:length options:options];
}
MetalBufferListEntry buffer_entry(buffer, command_buffer);
stats.mem_alloc(buffer.allocatedSize);
total_temp_mem_size += buffer.allocatedSize;
buffer_in_use_list.push_back(buffer_entry);
buffer_mutex.unlock();
return buffer;
}
@@ -178,16 +167,10 @@ void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> com
{
assert(command_buffer);
thread_scoped_lock lock(buffer_mutex);
/* Release all buffers that have not been recently reused back into the free pool */
for (auto entry = buffer_in_use_list.begin(); entry != buffer_in_use_list.end();) {
MetalBufferListEntry buffer_entry = *entry;
/* Mark any temp buffers associated with command_buffer as unused. */
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
if (buffer_entry.command_buffer == command_buffer) {
entry = buffer_in_use_list.erase(entry);
buffer_entry.command_buffer = nil;
buffer_free_list.push_back(buffer_entry);
}
else {
entry++;
}
}
}
@@ -196,16 +179,12 @@ MetalBufferPool::~MetalBufferPool()
{
thread_scoped_lock lock(buffer_mutex);
/* Release all buffers that have not been recently reused */
for (auto entry = buffer_free_list.begin(); entry != buffer_free_list.end();) {
MetalBufferListEntry buffer_entry = *entry;
id<MTLBuffer> buffer = buffer_entry.buffer;
// NSLog(@"Releasing buffer of length %lu (%lu) (%lu outstanding)", buffer.length, frameCount,
// bufferFreeList.size());
total_temp_mem_size -= buffer.allocatedSize;
[buffer release];
entry = buffer_free_list.erase(entry);
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
total_temp_mem_size -= buffer_entry.buffer.allocatedSize;
[buffer_entry.buffer release];
buffer_entry.buffer = nil;
}
temp_buffers.clear();
}
CCL_NAMESPACE_END

View File

@@ -989,11 +989,12 @@ ccl_device_forceinline bool integrate_volume_phase_scatter(
ccl_device VolumeIntegrateEvent volume_integrate(KernelGlobals kg,
IntegratorState state,
ccl_private Ray *ccl_restrict ray,
const int object,
ccl_global float *ccl_restrict render_buffer)
{
ShaderData sd;
shader_setup_from_volume(kg, &sd, ray, object);
/* FIXME: `object` is used for light linking. We read the bottom of the stack for simplicity, but
* this does not work for overlapping volumes. */
shader_setup_from_volume(kg, &sd, ray, INTEGRATOR_STATE_ARRAY(state, volume_stack, 0, object));
/* Load random number state. */
RNGState rng_state;
@@ -1186,8 +1187,7 @@ ccl_device void integrator_shade_volume(KernelGlobals kg,
volume_stack_clean(kg, state);
}
const VolumeIntegrateEvent event = volume_integrate(
kg, state, &ray, isect.object, render_buffer);
const VolumeIntegrateEvent event = volume_integrate(kg, state, &ray, render_buffer);
if (event == VOLUME_PATH_MISSED) {
/* End path. */
integrator_path_terminate(kg, state, DEVICE_KERNEL_INTEGRATOR_SHADE_VOLUME);