Vulkan: Swap to system memory for device local memory
This PR will swap device local memory to system ram. It relies on VK_EXT_external_memory and VK_EXT_pageable_device_local_memory extensions to be supported by the system. Most platforms support these extensions. Pull Request: https://projects.blender.org/blender/blender/pulls/144422
This commit is contained in:
@@ -393,6 +393,22 @@ class GHOST_DeviceVK {
|
||||
feature_struct_ptr.push_back(&fragment_shader_barycentric);
|
||||
}
|
||||
|
||||
/* VK_EXT_memory_priority */
|
||||
VkPhysicalDeviceMemoryPriorityFeaturesEXT memory_priority = {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT, nullptr, VK_TRUE};
|
||||
if (extension_enabled(VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME)) {
|
||||
feature_struct_ptr.push_back(&memory_priority);
|
||||
}
|
||||
|
||||
/* VK_EXT_pageable_device_local_memory */
|
||||
VkPhysicalDevicePageableDeviceLocalMemoryFeaturesEXT pageable_device_local_memory = {
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT,
|
||||
nullptr,
|
||||
VK_TRUE};
|
||||
if (extension_enabled(VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME)) {
|
||||
feature_struct_ptr.push_back(&pageable_device_local_memory);
|
||||
}
|
||||
|
||||
/* Link all registered feature structs. */
|
||||
for (int i = 1; i < feature_struct_ptr.size(); i++) {
|
||||
((VkBaseInStructure *)(feature_struct_ptr[i - 1]))->pNext =
|
||||
@@ -1291,6 +1307,8 @@ GHOST_TSuccess GHOST_ContextVK::initializeDrawingContext()
|
||||
optional_device_extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
optional_device_extensions.push_back(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME);
|
||||
optional_device_extensions.push_back(VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
|
||||
optional_device_extensions.push_back(VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME);
|
||||
optional_device_extensions.push_back(VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME);
|
||||
|
||||
VkInstance instance = VK_NULL_HANDLE;
|
||||
if (!vulkan_device.has_value()) {
|
||||
|
||||
@@ -409,6 +409,7 @@ void VKBackend::detect_workarounds(VKDevice &device)
|
||||
extensions.dynamic_rendering_local_read = false;
|
||||
extensions.dynamic_rendering_unused_attachments = false;
|
||||
extensions.descriptor_buffer = false;
|
||||
extensions.pageable_device_local_memory = false;
|
||||
|
||||
device.workarounds_ = workarounds;
|
||||
device.extensions_ = extensions;
|
||||
@@ -431,6 +432,9 @@ void VKBackend::detect_workarounds(VKDevice &device)
|
||||
extensions.descriptor_buffer = device.supports_extension(
|
||||
VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME);
|
||||
#endif
|
||||
extensions.memory_priority = device.supports_extension(VK_EXT_MEMORY_PRIORITY_EXTENSION_NAME);
|
||||
extensions.pageable_device_local_memory = device.supports_extension(
|
||||
VK_EXT_PAGEABLE_DEVICE_LOCAL_MEMORY_EXTENSION_NAME);
|
||||
#ifdef _WIN32
|
||||
extensions.external_memory = device.supports_extension(
|
||||
VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME);
|
||||
|
||||
@@ -25,6 +25,7 @@ bool VKBuffer::create(size_t size_in_bytes,
|
||||
VkMemoryPropertyFlags required_flags,
|
||||
VkMemoryPropertyFlags preferred_flags,
|
||||
VmaAllocationCreateFlags allocation_flags,
|
||||
float priority,
|
||||
bool export_memory)
|
||||
{
|
||||
BLI_assert(!is_allocated());
|
||||
@@ -60,7 +61,7 @@ bool VKBuffer::create(size_t size_in_bytes,
|
||||
|
||||
VmaAllocationCreateInfo vma_create_info = {};
|
||||
vma_create_info.flags = allocation_flags;
|
||||
vma_create_info.priority = 1.0f;
|
||||
vma_create_info.priority = priority;
|
||||
vma_create_info.requiredFlags = required_flags;
|
||||
vma_create_info.preferredFlags = preferred_flags;
|
||||
vma_create_info.usage = VMA_MEMORY_USAGE_AUTO;
|
||||
|
||||
@@ -50,6 +50,7 @@ class VKBuffer : public NonCopyable {
|
||||
VkMemoryPropertyFlags required_flags,
|
||||
VkMemoryPropertyFlags preferred_flags,
|
||||
VmaAllocationCreateFlags vma_allocation_flags,
|
||||
float priority,
|
||||
bool export_memory = false);
|
||||
void clear(VKContext &context, uint32_t clear_value);
|
||||
void update_immediately(const void *data) const;
|
||||
|
||||
@@ -535,7 +535,8 @@ void VKDescriptorBufferUpdator::allocate_new_descriptor_set(
|
||||
VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
0,
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT);
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT,
|
||||
0.8f);
|
||||
debug::object_label(buffer->vk_handle(), "DescriptorBuffer");
|
||||
descriptor_buffer_data = static_cast<uint8_t *>(buffer->mapped_memory_get());
|
||||
descriptor_buffer_device_address = buffer->device_address_get();
|
||||
|
||||
@@ -42,6 +42,8 @@ void VKExtensions::log() const
|
||||
" - [%c] dynamic rendering local read\n"
|
||||
" - [%c] dynamic rendering unused attachments\n"
|
||||
" - [%c] external memory\n"
|
||||
" - [%c] memory priority\n"
|
||||
" - [%c] pageable device local memory\n"
|
||||
" - [%c] shader stencil export",
|
||||
shader_output_viewport_index ? 'X' : ' ',
|
||||
shader_output_layer ? 'X' : ' ',
|
||||
@@ -50,6 +52,8 @@ void VKExtensions::log() const
|
||||
dynamic_rendering_local_read ? 'X' : ' ',
|
||||
dynamic_rendering_unused_attachments ? 'X' : ' ',
|
||||
external_memory ? 'X' : ' ',
|
||||
memory_priority ? 'X' : ' ',
|
||||
pageable_device_local_memory ? 'X' : ' ',
|
||||
GPU_stencil_export_support() ? 'X' : ' ');
|
||||
}
|
||||
|
||||
@@ -262,6 +266,9 @@ void VKDevice::init_memory_allocator()
|
||||
if (extensions_.descriptor_buffer) {
|
||||
info.flags |= VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT;
|
||||
}
|
||||
if (extensions_.memory_priority) {
|
||||
info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_PRIORITY_BIT;
|
||||
}
|
||||
vmaCreateAllocator(&info, &mem_allocator_);
|
||||
|
||||
if (!extensions_.external_memory) {
|
||||
@@ -308,6 +315,7 @@ void VKDevice::init_memory_allocator()
|
||||
VmaPoolCreateInfo pool_create_info = {};
|
||||
pool_create_info.memoryTypeIndex = memory_type_index;
|
||||
pool_create_info.pMemoryAllocateNext = &vma_pools.external_memory_info;
|
||||
pool_create_info.priority = 1.0f;
|
||||
vmaCreatePool(mem_allocator_, &pool_create_info, &vma_pools.external_memory);
|
||||
}
|
||||
|
||||
@@ -317,7 +325,8 @@ void VKDevice::init_dummy_buffer()
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VkMemoryPropertyFlags(0),
|
||||
VmaAllocationCreateFlags(0));
|
||||
VmaAllocationCreateFlags(0),
|
||||
1.0f);
|
||||
debug::object_label(dummy_buffer.vk_handle(), "DummyBuffer");
|
||||
/* Default dummy buffer. Set the 4th element to 1 to fix missing orcos. */
|
||||
float data[16] = {
|
||||
|
||||
@@ -65,6 +65,16 @@ struct VKExtensions {
|
||||
*/
|
||||
bool logic_ops = false;
|
||||
|
||||
/**
|
||||
* Does the device support VK_EXT_memory_priority
|
||||
*/
|
||||
bool memory_priority = false;
|
||||
|
||||
/**
|
||||
* Does the device support VK_EXT_pageable_device_local_memory
|
||||
*/
|
||||
bool pageable_device_local_memory = false;
|
||||
|
||||
/** Log enabled features and extensions. */
|
||||
void log() const;
|
||||
};
|
||||
|
||||
@@ -145,7 +145,8 @@ VKBuffer &VKImmediate::ensure_space(VkDeviceSize bytes_needed, VkDeviceSize offs
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
|
||||
0.8);
|
||||
debug::object_label(result.vk_handle(), "Immediate");
|
||||
|
||||
return result;
|
||||
|
||||
@@ -113,7 +113,8 @@ void VKIndexBuffer::allocate()
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VkMemoryPropertyFlags(0),
|
||||
VmaAllocationCreateFlags(0));
|
||||
VmaAllocationCreateFlags(0),
|
||||
0.8f);
|
||||
debug::object_label(buffer_.vk_handle(), "IndexBuffer");
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,8 @@ VKStagingBuffer::VKStagingBuffer(const VKBuffer &device_buffer,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
|
||||
0.4f);
|
||||
debug::object_label(host_buffer_.vk_handle(), "StagingBuffer");
|
||||
}
|
||||
|
||||
|
||||
@@ -69,7 +69,8 @@ void VKStorageBuffer::allocate()
|
||||
buffer_usage_flags,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VkMemoryPropertyFlags(0),
|
||||
VmaAllocationCreateFlags(0));
|
||||
VmaAllocationCreateFlags(0),
|
||||
0.8f);
|
||||
BLI_assert(buffer_.is_allocated());
|
||||
debug::object_label(buffer_.vk_handle(), name_);
|
||||
}
|
||||
|
||||
@@ -200,7 +200,8 @@ void VKTexture::read_sub(
|
||||
/* Although we are only reading, we need to set the host access random bit
|
||||
* to improve the performance on AMD GPUs. */
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT);
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||
0.2f);
|
||||
|
||||
render_graph::VKCopyImageToBufferNode::CreateInfo copy_image_to_buffer = {};
|
||||
render_graph::VKCopyImageToBufferNode::Data &node_data = copy_image_to_buffer.node_data;
|
||||
@@ -322,7 +323,8 @@ void VKTexture::update_sub(int mip,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VMA_ALLOCATION_CREATE_MAPPED_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT,
|
||||
0.4f);
|
||||
vk_buffer = staging_buffer.vk_handle();
|
||||
/* Rows are sequentially stored, when unpack row length is 0, or equal to the extent width. In
|
||||
* other cases we unpack the rows to reduce the size of the staging buffer and data transfer.
|
||||
@@ -549,6 +551,17 @@ static VkImageCreateFlags to_vk_image_create(const eGPUTextureType texture_type,
|
||||
return result;
|
||||
}
|
||||
|
||||
static float memory_priority(const eGPUTextureUsage texture_usage)
|
||||
{
|
||||
if (bool(texture_usage & GPU_TEXTURE_USAGE_MEMORY_EXPORT)) {
|
||||
return 0.8f;
|
||||
}
|
||||
if (bool(texture_usage & GPU_TEXTURE_USAGE_ATTACHMENT)) {
|
||||
return 1.0f;
|
||||
}
|
||||
return 0.5f;
|
||||
}
|
||||
|
||||
bool VKTexture::allocate()
|
||||
{
|
||||
BLI_assert(vk_image_ == VK_NULL_HANDLE);
|
||||
@@ -603,7 +616,7 @@ bool VKTexture::allocate()
|
||||
|
||||
VmaAllocationCreateInfo allocCreateInfo = {};
|
||||
allocCreateInfo.usage = VMA_MEMORY_USAGE_AUTO;
|
||||
allocCreateInfo.priority = 1.0f;
|
||||
allocCreateInfo.priority = memory_priority(texture_usage);
|
||||
|
||||
if (bool(texture_usage & GPU_TEXTURE_USAGE_MEMORY_EXPORT)) {
|
||||
image_info.pNext = &external_memory_create_info;
|
||||
|
||||
@@ -41,7 +41,8 @@ void VKUniformBuffer::allocate()
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||
VmaAllocationCreateFlags(0));
|
||||
VmaAllocationCreateFlags(0),
|
||||
0.8f);
|
||||
debug::object_label(buffer_.vk_handle(), name_);
|
||||
}
|
||||
|
||||
|
||||
@@ -210,7 +210,8 @@ void VKVertexBuffer::allocate()
|
||||
vk_buffer_usage,
|
||||
0,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
VmaAllocationCreateFlags(0));
|
||||
VmaAllocationCreateFlags(0),
|
||||
0.8f);
|
||||
debug::object_label(buffer_.vk_handle(), "VertexBuffer");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user