This PR removes a bunch of dead code following #123551 (removal of AMD and Intel GPU support). It is safe to assume that UMA will be available, so a lot of codepaths that dealt with copying between CPU and GPU are now just clutter. Pull Request: https://projects.blender.org/blender/blender/pulls/136146
183 lines
5.4 KiB
Plaintext
183 lines
5.4 KiB
Plaintext
/* SPDX-FileCopyrightText: 2021-2022 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#ifdef WITH_METAL
|
|
|
|
# include "device/metal/util.h"
|
|
# include "device/metal/device_impl.h"
|
|
# include "util/md5.h"
|
|
# include "util/path.h"
|
|
# include "util/string.h"
|
|
# include "util/time.h"
|
|
|
|
# include <IOKit/IOKitLib.h>
|
|
# include <ctime>
|
|
# include <pwd.h>
|
|
# include <sys/shm.h>
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
string MetalInfo::get_device_name(id<MTLDevice> device)
|
|
{
|
|
string device_name = [device.name UTF8String];
|
|
|
|
/* Append the GPU core count so we can distinguish between GPU variants in benchmarks. */
|
|
int gpu_core_count = get_apple_gpu_core_count(device);
|
|
device_name += string_printf(gpu_core_count ? " (GPU - %d cores)" : " (GPU)", gpu_core_count);
|
|
|
|
return device_name;
|
|
}
|
|
|
|
int MetalInfo::get_apple_gpu_core_count(id<MTLDevice> device)
|
|
{
|
|
int core_count = 0;
|
|
if (@available(macos 12.0, *)) {
|
|
io_service_t gpu_service = IOServiceGetMatchingService(
|
|
kIOMainPortDefault, IORegistryEntryIDMatching(device.registryID));
|
|
if (CFNumberRef numberRef = (CFNumberRef)IORegistryEntryCreateCFProperty(
|
|
gpu_service, CFSTR("gpu-core-count"), nullptr, 0))
|
|
{
|
|
if (CFGetTypeID(numberRef) == CFNumberGetTypeID()) {
|
|
CFNumberGetValue(numberRef, kCFNumberSInt32Type, &core_count);
|
|
}
|
|
CFRelease(numberRef);
|
|
}
|
|
}
|
|
return core_count;
|
|
}
|
|
|
|
AppleGPUArchitecture MetalInfo::get_apple_gpu_architecture(id<MTLDevice> device)
|
|
{
|
|
const char *device_name = [device.name UTF8String];
|
|
if (strstr(device_name, "M1")) {
|
|
return APPLE_M1;
|
|
}
|
|
if (strstr(device_name, "M2")) {
|
|
return get_apple_gpu_core_count(device) <= 10 ? APPLE_M2 : APPLE_M2_BIG;
|
|
}
|
|
if (strstr(device_name, "M3")) {
|
|
return APPLE_M3;
|
|
}
|
|
return APPLE_UNKNOWN;
|
|
}
|
|
|
|
int MetalInfo::optimal_sort_partition_elements()
|
|
{
|
|
if (auto *str = getenv("CYCLES_METAL_SORT_PARTITION_ELEMENTS")) {
|
|
return atoi(str);
|
|
}
|
|
|
|
/* On M1 and M2 GPUs, we see better cache utilization if we partition the active indices before
|
|
* sorting each partition by material. Partitioning into chunks of 65536 elements results in an
|
|
* overall render time speedup of up to 15%. */
|
|
|
|
return 65536;
|
|
}
|
|
|
|
const vector<id<MTLDevice>> &MetalInfo::get_usable_devices()
|
|
{
|
|
static vector<id<MTLDevice>> usable_devices;
|
|
static bool already_enumerated = false;
|
|
|
|
if (already_enumerated) {
|
|
return usable_devices;
|
|
}
|
|
|
|
metal_printf("Usable Metal devices:\n");
|
|
for (id<MTLDevice> device in MTLCopyAllDevices()) {
|
|
string device_name = get_device_name(device);
|
|
bool usable = false;
|
|
|
|
if (@available(macos 12.2, *)) {
|
|
const char *device_name_char = [device.name UTF8String];
|
|
if (!(strstr(device_name_char, "Intel") || strstr(device_name_char, "AMD")) &&
|
|
strstr(device_name_char, "Apple"))
|
|
{
|
|
/* TODO: Implement a better way to identify device vendor instead of relying on name. */
|
|
/* We only support Apple Silicon GPUs which all have unified memory, but explicitly check
|
|
* just in case it ever changes. */
|
|
usable = [device hasUnifiedMemory];
|
|
}
|
|
}
|
|
|
|
if (usable) {
|
|
metal_printf("- %s\n", device_name.c_str());
|
|
[device retain];
|
|
usable_devices.push_back(device);
|
|
}
|
|
else {
|
|
metal_printf(" (skipping \"%s\")\n", device_name.c_str());
|
|
}
|
|
}
|
|
if (usable_devices.empty()) {
|
|
metal_printf(" No usable Metal devices found\n");
|
|
}
|
|
already_enumerated = true;
|
|
|
|
return usable_devices;
|
|
}
|
|
|
|
id<MTLBuffer> MetalBufferPool::get_buffer(id<MTLDevice> device,
|
|
id<MTLCommandBuffer> command_buffer,
|
|
NSUInteger length,
|
|
const void *pointer,
|
|
Stats &stats)
|
|
{
|
|
id<MTLBuffer> buffer = nil;
|
|
{
|
|
thread_scoped_lock lock(buffer_mutex);
|
|
/* Find an unused buffer with matching size and storage mode. */
|
|
for (MetalBufferListEntry &bufferEntry : temp_buffers) {
|
|
if (bufferEntry.buffer.length == length && bufferEntry.command_buffer == nil) {
|
|
buffer = bufferEntry.buffer;
|
|
bufferEntry.command_buffer = command_buffer;
|
|
break;
|
|
}
|
|
}
|
|
if (!buffer) {
|
|
/* Create a new buffer and add it to the pool. Typically this pool will only grow to a
|
|
* handful of entries. */
|
|
buffer = [device newBufferWithLength:length options:MTLResourceStorageModeShared];
|
|
stats.mem_alloc(buffer.allocatedSize);
|
|
total_temp_mem_size += buffer.allocatedSize;
|
|
temp_buffers.push_back(MetalBufferListEntry{buffer, command_buffer});
|
|
}
|
|
}
|
|
|
|
/* Copy over data */
|
|
if (pointer) {
|
|
memcpy(buffer.contents, pointer, length);
|
|
}
|
|
|
|
return buffer;
|
|
}
|
|
|
|
void MetalBufferPool::process_command_buffer_completion(id<MTLCommandBuffer> command_buffer)
|
|
{
|
|
assert(command_buffer);
|
|
thread_scoped_lock lock(buffer_mutex);
|
|
/* Mark any temp buffers associated with command_buffer as unused. */
|
|
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
|
|
if (buffer_entry.command_buffer == command_buffer) {
|
|
buffer_entry.command_buffer = nil;
|
|
}
|
|
}
|
|
}
|
|
|
|
MetalBufferPool::~MetalBufferPool()
|
|
{
|
|
thread_scoped_lock lock(buffer_mutex);
|
|
/* Release all buffers that have not been recently reused */
|
|
for (MetalBufferListEntry &buffer_entry : temp_buffers) {
|
|
total_temp_mem_size -= buffer_entry.buffer.allocatedSize;
|
|
[buffer_entry.buffer release];
|
|
buffer_entry.buffer = nil;
|
|
}
|
|
temp_buffers.clear();
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
#endif /* WITH_METAL */
|