Vulkan: Swap to system memory for device local memory

This PR will swap device local memory to system ram. It relies on
VK_EXT_external_memory and VK_EXT_pageable_device_local_memory
extensions to be supported by the system.

Most platforms support these extensions.

Pull Request: https://projects.blender.org/blender/blender/pulls/144422
This commit is contained in:
Jeroen Bakker
2025-08-12 11:51:40 +02:00
parent d278e7d424
commit 42c3f35780
14 changed files with 75 additions and 12 deletions

View File

@@ -393,6 +393,22 @@ class GHOST_DeviceVK {
feature_struct_ptr.push_back(&fragment_shader_barycentric);
}
/* VK_EXT_memory_priority */
VkPhysicalDeviceMemoryPriorityFeaturesEXT memory_priority = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT, nullptr, VK_TRUE};
if (extension_enabled(VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME)) {
feature_struct_ptr.push_back(&memory_priority);
}
/* VK_EXT_pageable_device_local_memory */
VkPhysicalDevicePageableDeviceLocalMemoryFeaturesEXT pageable_device_local_memory = {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT,
nullptr,
VK_TRUE};
if (extension_enabled(VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME)) {
feature_struct_ptr.push_back(&pageable_device_local_memory);
}
/* Link all registered feature structs. */
for (int i = 1; i < feature_struct_ptr.size(); i++) {
((VkBaseInStructure *)(feature_struct_ptr[i - 1]))->pNext =
@@ -1291,6 +1307,8 @@ GHOST_TSuccess GHOST_ContextVK::initializeDrawingContext()
optional_device_extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
optional_device_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
optional_device_extensions.push_back(VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
optional_device_extensions.push_back(VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME);
optional_device_extensions.push_back(VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME);
VkInstance instance = VK_NULL_HANDLE;
if (!vulkan_device.has_value()) {

View File

@@ -409,6 +409,7 @@ void VKBackend::detect_workarounds(VKDevice &device)
extensions.dynamic_rendering_local_read = false;
extensions.dynamic_rendering_unused_attachments = false;
extensions.descriptor_buffer = false;
extensions.pageable_device_local_memory = false;
device.workarounds_ = workarounds;
device.extensions_ = extensions;
@@ -431,6 +432,9 @@ void VKBackend::detect_workarounds(VKDevice &device)
extensions.descriptor_buffer = device.supports_extension(
VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
#endif
extensions.memory_priority = device.supports_extension(VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME);
extensions.pageable_device_local_memory = device.supports_extension(
VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME);
#ifdef _WIN32
extensions.external_memory = device.supports_extension(
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME);

View File

@@ -25,6 +25,7 @@ bool VKBuffer::create(size_t size_in_bytes,
VkMemoryPropertyFlags required_flags,
VkMemoryPropertyFlags preferred_flags,
VmaAllocationCreateFlags allocation_flags,
float priority,
bool export_memory)
{
BLI_assert(!is_allocated());
@@ -60,7 +61,7 @@ bool VKBuffer::create(size_t size_in_bytes,
VmaAllocationCreateInfo vma_create_info = {};
vma_create_info.flags = allocation_flags;
vma_create_info.priority = 1.0f;
vma_create_info.priority = priority;
vma_create_info.requiredFlags = required_flags;
vma_create_info.preferredFlags = preferred_flags;
vma_create_info.usage = VMA_MEMORY_USAGE_AUTO;

View File

@@ -50,6 +50,7 @@ class VKBuffer : public NonCopyable {
VkMemoryPropertyFlags required_flags,
VkMemoryPropertyFlags preferred_flags,
VmaAllocationCreateFlags vma_allocation_flags,
float priority,
bool export_memory = false);
void clear(VKContext &context, uint32_t clear_value);
void update_immediately(const void *data) const;

View File

@@ -535,7 +535,8 @@ void VKDescriptorBufferUpdator::allocate_new_descriptor_set(
VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
0,
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT);
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT,
0.8f);
debug::object_label(buffer->vk_handle(), "DescriptorBuffer");
descriptor_buffer_data = static_cast<uint8_t *>(buffer->mapped_memory_get());
descriptor_buffer_device_address = buffer->device_address_get();

View File

@@ -42,6 +42,8 @@ void VKExtensions::log() const
" - [%c] dynamic rendering local read\n"
" - [%c] dynamic rendering unused attachments\n"
" - [%c] external memory\n"
" - [%c] memory priority\n"
" - [%c] pageable device local memory\n"
" - [%c] shader stencil export",
shader_output_viewport_index ? 'X' : ' ',
shader_output_layer ? 'X' : ' ',
@@ -50,6 +52,8 @@ void VKExtensions::log() const
dynamic_rendering_local_read ? 'X' : ' ',
dynamic_rendering_unused_attachments ? 'X' : ' ',
external_memory ? 'X' : ' ',
memory_priority ? 'X' : ' ',
pageable_device_local_memory ? 'X' : ' ',
GPU_stencil_export_support() ? 'X' : ' ');
}
@@ -262,6 +266,9 @@ void VKDevice::init_memory_allocator()
if (extensions_.descriptor_buffer) {
info.flags |= VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT;
}
if (extensions_.memory_priority) {
info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT;
}
vmaCreateAllocator(&info, &mem_allocator_);
if (!extensions_.external_memory) {
@@ -308,6 +315,7 @@ void VKDevice::init_memory_allocator()
VmaPoolCreateInfo pool_create_info = {};
pool_create_info.memoryTypeIndex = memory_type_index;
pool_create_info.pMemoryAllocateNext = &vma_pools.external_memory_info;
pool_create_info.priority = 1.0f;
vmaCreatePool(mem_allocator_, &pool_create_info, &vma_pools.external_memory);
}
@@ -317,7 +325,8 @@ void VKDevice::init_dummy_buffer()
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VkMemoryPropertyFlags(0),
VmaAllocationCreateFlags(0));
VmaAllocationCreateFlags(0),
1.0f);
debug::object_label(dummy_buffer.vk_handle(), "DummyBuffer");
/* Default dummy buffer. Set the 4th element to 1 to fix missing orcos. */
float data[16] = {

View File

@@ -65,6 +65,16 @@ struct VKExtensions {
*/
bool logic_ops = false;
/**
* Does the device support VK_EXT_memory_priority
*/
bool memory_priority = false;
/**
* Does the device support VK_EXT_pageable_device_local_memory
*/
bool pageable_device_local_memory = false;
/** Log enabled features and extensions. */
void log() const;
};

View File

@@ -145,7 +145,8 @@ VKBuffer &VKImmediate::ensure_space(VkDeviceSize bytes_needed, VkDeviceSize offs
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
0.8);
debug::object_label(result.vk_handle(), "Immediate");
return result;

View File

@@ -113,7 +113,8 @@ void VKIndexBuffer::allocate()
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VkMemoryPropertyFlags(0),
VmaAllocationCreateFlags(0));
VmaAllocationCreateFlags(0),
0.8f);
debug::object_label(buffer_.vk_handle(), "IndexBuffer");
}

View File

@@ -33,7 +33,8 @@ VKStagingBuffer::VKStagingBuffer(const VKBuffer &device_buffer,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
0.4f);
debug::object_label(host_buffer_.vk_handle(), "StagingBuffer");
}

View File

@@ -69,7 +69,8 @@ void VKStorageBuffer::allocate()
buffer_usage_flags,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VkMemoryPropertyFlags(0),
VmaAllocationCreateFlags(0));
VmaAllocationCreateFlags(0),
0.8f);
BLI_assert(buffer_.is_allocated());
debug::object_label(buffer_.vk_handle(), name_);
}

View File

@@ -200,7 +200,8 @@ void VKTexture::read_sub(
/* Although we are only reading, we need to set the host access random bit
* to improve the performance on AMD GPUs. */
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
VMA_ALLOCATION_CREATE_MAPPED_BIT);
VMA_ALLOCATION_CREATE_MAPPED_BIT,
0.2f);
render_graph::VKCopyImageToBufferNode::CreateInfo copy_image_to_buffer = {};
render_graph::VKCopyImageToBufferNode::Data &node_data = copy_image_to_buffer.node_data;
@@ -322,7 +323,8 @@ void VKTexture::update_sub(int mip,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
0.4f);
vk_buffer = staging_buffer.vk_handle();
/* Rows are sequentially stored, when unpack row length is 0, or equal to the extent width. In
* other cases we unpack the rows to reduce the size of the staging buffer and data transfer.
@@ -549,6 +551,17 @@ static VkImageCreateFlags to_vk_image_create(const eGPUTextureType texture_type,
return result;
}
static float memory_priority(const eGPUTextureUsage texture_usage)
{
if (bool(texture_usage & GPU_TEXTURE_USAGE_MEMORY_EXPORT)) {
return 0.8f;
}
if (bool(texture_usage & GPU_TEXTURE_USAGE_ATTACHMENT)) {
return 1.0f;
}
return 0.5f;
}
bool VKTexture::allocate()
{
BLI_assert(vk_image_ == VK_NULL_HANDLE);
@@ -603,7 +616,7 @@ bool VKTexture::allocate()
VmaAllocationCreateInfo allocCreateInfo = {};
allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
allocCreateInfo.priority = 1.0f;
allocCreateInfo.priority = memory_priority(texture_usage);
if (bool(texture_usage & GPU_TEXTURE_USAGE_MEMORY_EXPORT)) {
image_info.pNext = &external_memory_create_info;

View File

@@ -41,7 +41,8 @@ void VKUniformBuffer::allocate()
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
VmaAllocationCreateFlags(0));
VmaAllocationCreateFlags(0),
0.8f);
debug::object_label(buffer_.vk_handle(), name_);
}

View File

@@ -210,7 +210,8 @@ void VKVertexBuffer::allocate()
vk_buffer_usage,
0,
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
VmaAllocationCreateFlags(0));
VmaAllocationCreateFlags(0),
0.8f);
debug::object_label(buffer_.vk_handle(), "VertexBuffer");
}