Files
test/intern/cycles/device/metal/util.mm
Michael Jones 584f19a5af Cycles: Apple Silicon tidy: Remove non-UMA codepaths (v2)
This PR removes a bunch of dead code following #123551 (removal of AMD and Intel GPU support). It is safe to assume that UMA will be available, so a lot of codepaths that dealt with copying between CPU and GPU are now just clutter.

Pull Request: https://projects.blender.org/blender/blender/pulls/136146
2025-03-19 12:53:01 +01:00

183 lines
5.4 KiB
Plaintext

/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#ifdef WITH_METAL
# include "device/metal/util.h"
# include "device/metal/device_impl.h"
# include "util/md5.h"
# include "util/path.h"
# include "util/string.h"
# include "util/time.h"
# include <IOKit/IOKitLib.h>
# include <ctime>
# include <pwd.h>
# include <sys/shm.h>
CCL_NAMESPACE_BEGIN
string MetalInfo::get_device_name(id<MTLDevice> device)
{
string device_name = [device.name UTF8String];
/* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
int gpu_core_count = get_apple_gpu_core_count(device);
device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
return device_name;
}
int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
{
int core_count = 0;
if (@available(macos 12.0, *)) {
io_service_t gpu_service = IOServiceGetMatchingService(
kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
gpu_service, CFSTR("gpu-core-count"), nullptr, 0))
{
if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
}
CFRelease(numberRef);
}
}
return core_count;
}
AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
{
const char *device_name = [device.name UTF8String];
if (strstr(device_name, "M1")) {
return APPLE_M1;
}
if (strstr(device_name, "M2")) {
return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
}
if (strstr(device_name, "M3")) {
return APPLE_M3;
}
return APPLE_UNKNOWN;
}
int MetalInfo::optimal_sort_partition_elements()
{
if (auto *str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
return atoi(str);
}
/* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
* sorting each partition by material. Partitioning into chunks of 65536 elements results in an
* overall render time speedup of up to 15%. */
return 65536;
}
const vector<id<MTLDevice>> &MetalInfo::get_usable_devices()
{
static vector<id<MTLDevice>> usable_devices;
static bool already_enumerated = false;
if (already_enumerated) {
return usable_devices;
}
metal_printf("Usable Metal devices:\n");
for (id<MTLDevice> device in MTLCopyAllDevices()) {
string device_name = get_device_name(device);
bool usable = false;
if (@available(macos 12.2, *)) {
const char *device_name_char = [device.name UTF8String];
if (!(strstr(device_name_char, "Intel") || strstr(device_name_char, "AMD")) &&
strstr(device_name_char, "Apple"))
{
/* TODO: Implement a better way to identify device vendor instead of relying on name. */
/* We only support Apple Silicon GPUs which all have unified memory, but explicitly check
* just in case it ever changes. */
usable = [device hasUnifiedMemory];
}
}
if (usable) {
metal_printf("- %s\n", device_name.c_str());
[device retain];
usable_devices.push_back(device);
}
else {
metal_printf(" (skipping \"%s\")\n", device_name.c_str());
}
}
if (usable_devices.empty()) {
metal_printf(" No usable Metal devices found\n");
}
already_enumerated = true;
return usable_devices;
}
id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
id<MTLCommandBuffer> command_buffer,
NSUInteger length,
const void *pointer,
Stats &stats)
{
id<MTLBuffer> buffer = nil;
{
thread_scoped_lock lock(buffer_mutex);
/* Find an unused buffer with matching size and storage mode. */
for (MetalBufferListEntry &bufferEntry : temp_buffers) {
if (bufferEntry.buffer.length == length && bufferEntry.command_buffer == nil) {
buffer = bufferEntry.buffer;
bufferEntry.command_buffer = command_buffer;
break;
}
}
if (!buffer) {
/* Create a new buffer and add it to the pool. Typically this pool will only grow to a
* handful of entries. */
buffer = [device newBufferWithLength:length options:MTLResourceStorageModeShared];
stats.mem_alloc(buffer.allocatedSize);
total_temp_mem_size += buffer.allocatedSize;
temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
}
}
/* Copy over data */
if (pointer) {
memcpy(buffer.contents, pointer, length);
}
return buffer;
}
void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
{
assert(command_buffer);
thread_scoped_lock lock(buffer_mutex);
/* Mark any temp buffers associated with command_buffer as unused. */
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
if (buffer_entry.command_buffer == command_buffer) {
buffer_entry.command_buffer = nil;
}
}
}
MetalBufferPool::~MetalBufferPool()
{
thread_scoped_lock lock(buffer_mutex);
/* Release all buffers that have not been recently reused */
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
total_temp_mem_size -= buffer_entry.buffer.allocatedSize;
[buffer_entry.buffer release];
buffer_entry.buffer = nil;
}
temp_buffers.clear();
}
CCL_NAMESPACE_END
#endif /* WITH_METAL */