This PR moves the ownership of vulkan memory allocator from gpu/device to GHOST/context/device. This will allow in the future a cleaner control flow between OpenXR and Vulkan. Currently as the ownership is in the gpu module many objects would need to pass from GHOST to the GPU module to be shared between the 2 allocators. Moving both (OpenXR/Context) allocator to GHOST would reduce the complexity. Pull Request: https://projects.blender.org/blender/blender/pulls/146819
494 lines
15 KiB
C++
494 lines
15 KiB
C++
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
|
|
#include "BLI_task.h"
|
|
#include "BLI_threads.h"
|
|
#include "BLI_utility_mixins.hh"
|
|
#include "BLI_vector.hh"
|
|
|
|
#include "render_graph/vk_render_graph.hh"
|
|
#include "render_graph/vk_resource_state_tracker.hh"
|
|
#include "vk_buffer.hh"
|
|
#include "vk_common.hh"
|
|
#include "vk_debug.hh"
|
|
#include "vk_descriptor_pools.hh"
|
|
#include "vk_descriptor_set_layouts.hh"
|
|
#include "vk_memory_pool.hh"
|
|
#include "vk_pipeline_pool.hh"
|
|
#include "vk_resource_pool.hh"
|
|
#include "vk_samplers.hh"
|
|
|
|
namespace blender::gpu {
|
|
class VKBackend;
|
|
|
|
struct VKExtensions {
|
|
/** Does the device support VkPhysicalDeviceVulkan12Features::shaderOutputViewportIndex. */
|
|
bool shader_output_viewport_index = false;
|
|
/** Does the device support VkPhysicalDeviceVulkan12Features::shaderOutputLayer. */
|
|
bool shader_output_layer = false;
|
|
/**
|
|
* Does the device support
|
|
* VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR::fragmentShaderBarycentric.
|
|
*/
|
|
bool fragment_shader_barycentric = false;
|
|
|
|
/**
|
|
* Does the device support wide line rendering
|
|
* VkPhysicalDeviceFeatures::wideLines
|
|
*/
|
|
bool wide_lines = false;
|
|
|
|
/**
|
|
* Does the device support VK_KHR_dynamic_rendering_local_read enabled.
|
|
*/
|
|
bool dynamic_rendering_local_read = false;
|
|
|
|
/**
|
|
* Does the device support VK_EXT_dynamic_rendering_unused_attachments.
|
|
*/
|
|
bool dynamic_rendering_unused_attachments = false;
|
|
|
|
/**
|
|
* Does the device support VK_EXT_external_memory_win32/VK_EXT_external_memory_fd
|
|
*/
|
|
bool external_memory = false;
|
|
|
|
/** VK_KHR_maintenance4 */
|
|
bool maintenance4 = false;
|
|
|
|
/**
|
|
* Does the device support VK_EXT_descriptor_buffer.
|
|
*/
|
|
bool descriptor_buffer = false;
|
|
|
|
/**
|
|
* Does the device support logic ops.
|
|
*/
|
|
bool logic_ops = false;
|
|
|
|
/**
|
|
* Does the device support VK_EXT_memory_priority
|
|
*/
|
|
bool memory_priority = false;
|
|
|
|
/**
|
|
* Does the device support VK_EXT_pageable_device_local_memory
|
|
*/
|
|
bool pageable_device_local_memory = false;
|
|
|
|
/** Log enabled features and extensions. */
|
|
void log() const;
|
|
};
|
|
|
|
/* TODO: Split into VKWorkarounds and VKExtensions to remove the negating when an extension isn't
|
|
* supported. */
|
|
struct VKWorkarounds {
|
|
/**
|
|
* Some devices don't support pixel formats that are aligned to 24 and 48 bits.
|
|
* In this case we need to use a different texture format.
|
|
*
|
|
* If set to true we should work around this issue by using a different texture format.
|
|
*/
|
|
bool not_aligned_pixel_formats = false;
|
|
|
|
struct {
|
|
/**
|
|
* Is the workaround enabled for devices that don't support using VK_FORMAT_R8G8B8_* as vertex
|
|
* buffer.
|
|
*/
|
|
bool r8g8b8 = false;
|
|
} vertex_formats;
|
|
};
|
|
|
|
/**
|
|
* Shared resources between contexts that run in the same thread.
|
|
*/
|
|
class VKThreadData : public NonCopyable, NonMovable {
|
|
public:
|
|
/** Thread ID this instance belongs to. */
|
|
pthread_t thread_id;
|
|
VKDescriptorPools descriptor_pools;
|
|
VKDescriptorSetTracker descriptor_set;
|
|
|
|
/**
|
|
* The current rendering depth.
|
|
*
|
|
* GPU_rendering_begin can be called multiple times forming a hierarchy. The same resource pool
|
|
* should be used for the whole hierarchy. rendering_depth is increased for every
|
|
* GPU_rendering_begin and decreased when GPU_rendering_end is called. Resources pools are cycled
|
|
* when the rendering_depth set to 0.
|
|
*/
|
|
int32_t rendering_depth = 0;
|
|
|
|
VKThreadData(VKDevice &device, pthread_t thread_id);
|
|
};
|
|
|
|
class VKDevice : public NonCopyable {
|
|
private:
|
|
/** Copies of the handles owned by the GHOST context. */
|
|
VkInstance vk_instance_ = VK_NULL_HANDLE;
|
|
VkPhysicalDevice vk_physical_device_ = VK_NULL_HANDLE;
|
|
VkDevice vk_device_ = VK_NULL_HANDLE;
|
|
uint32_t vk_queue_family_ = 0;
|
|
VkQueue vk_queue_ = VK_NULL_HANDLE;
|
|
std::mutex *queue_mutex_ = nullptr;
|
|
|
|
bool is_initialized_ = false;
|
|
|
|
/**
|
|
* Task pool for render graph submission.
|
|
*
|
|
* Multiple threads in Blender can build a render graph. Building the command buffer for a render
|
|
* graph is faster when doing it in serial. Submission pool ensures that only one task is
|
|
* building at a time (background_serial).
|
|
*/
|
|
TaskPool *submission_pool_ = nullptr;
|
|
/**
|
|
* All created render graphs.
|
|
*/
|
|
Vector<render_graph::VKRenderGraph *> render_graphs_;
|
|
ThreadQueue *submitted_render_graphs_ = nullptr;
|
|
ThreadQueue *unused_render_graphs_ = nullptr;
|
|
VkSemaphore vk_timeline_semaphore_ = VK_NULL_HANDLE;
|
|
/**
|
|
* Last used timeline value.
|
|
*
|
|
* Must be externally synced by orphaned_data.mutex_get()
|
|
*/
|
|
TimelineValue timeline_value_ = 0;
|
|
|
|
VKSamplers samplers_;
|
|
VKDescriptorSetLayouts descriptor_set_layouts_;
|
|
|
|
/**
|
|
* Available Contexts for this device.
|
|
*
|
|
* Device keeps track of each contexts. When buffers/images are freed they need to be removed
|
|
* from all contexts state managers.
|
|
*
|
|
* The contexts inside this list aren't owned by the VKDevice. Caller of `GPU_context_create`
|
|
* holds the ownership.
|
|
*/
|
|
Vector<std::reference_wrapper<VKContext>> contexts_;
|
|
|
|
/** Allocator used for texture and buffers and other resources. */
|
|
VmaAllocator mem_allocator_ = VK_NULL_HANDLE;
|
|
|
|
/** Limits of the device linked to this context. */
|
|
VkPhysicalDeviceProperties vk_physical_device_properties_ = {};
|
|
VkPhysicalDeviceDriverProperties vk_physical_device_driver_properties_ = {};
|
|
VkPhysicalDeviceIDProperties vk_physical_device_id_properties_ = {};
|
|
VkPhysicalDeviceMemoryProperties vk_physical_device_memory_properties_ = {};
|
|
VkPhysicalDeviceMaintenance4Properties vk_physical_device_maintenance4_properties_ = {
|
|
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES};
|
|
VkPhysicalDeviceDescriptorBufferPropertiesEXT vk_physical_device_descriptor_buffer_properties_ =
|
|
{};
|
|
/** Features support. */
|
|
VkPhysicalDeviceFeatures vk_physical_device_features_ = {};
|
|
VkPhysicalDeviceVulkan11Features vk_physical_device_vulkan_11_features_ = {};
|
|
VkPhysicalDeviceVulkan12Features vk_physical_device_vulkan_12_features_ = {};
|
|
Array<VkExtensionProperties> device_extensions_;
|
|
|
|
/** Functions of vk_ext_debugutils for this device/instance. */
|
|
debug::VKDebuggingTools debugging_tools_;
|
|
|
|
/* Workarounds */
|
|
VKWorkarounds workarounds_;
|
|
VKExtensions extensions_;
|
|
|
|
std::string glsl_vert_patch_;
|
|
std::string glsl_geom_patch_;
|
|
std::string glsl_frag_patch_;
|
|
std::string glsl_comp_patch_;
|
|
Vector<VKThreadData *> thread_data_;
|
|
|
|
Shader *vk_backbuffer_blit_sh_ = nullptr;
|
|
|
|
public:
|
|
render_graph::VKResourceStateTracker resources;
|
|
VKDiscardPool orphaned_data;
|
|
/** Discard pool for resources that could still be used during rendering. */
|
|
VKDiscardPool orphaned_data_render;
|
|
VKPipelinePool pipelines;
|
|
/** Buffer to bind to unbound resource locations. */
|
|
VKBuffer dummy_buffer;
|
|
|
|
/**
|
|
* This struct contains the functions pointer to extension provided functions.
|
|
*/
|
|
struct {
|
|
/* Extension: VK_KHR_dynamic_rendering */
|
|
PFN_vkCmdBeginRendering vkCmdBeginRendering = nullptr;
|
|
PFN_vkCmdEndRendering vkCmdEndRendering = nullptr;
|
|
|
|
/* Extension: VK_EXT_debug_utils */
|
|
PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabel = nullptr;
|
|
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabel = nullptr;
|
|
PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectName = nullptr;
|
|
PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessenger = nullptr;
|
|
PFN_vkDestroyDebugUtilsMessengerEXT vkDestroyDebugUtilsMessenger = nullptr;
|
|
|
|
/* Extension: VK_KHR_external_memory_fd */
|
|
PFN_vkGetMemoryFdKHR vkGetMemoryFd = nullptr;
|
|
|
|
#ifdef _WIN32
|
|
/* Extension: VK_KHR_external_memory_win32 */
|
|
PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32Handle = nullptr;
|
|
#endif
|
|
|
|
/* Extension: VK_EXT_descriptor_buffer */
|
|
PFN_vkGetDescriptorSetLayoutSizeEXT vkGetDescriptorSetLayoutSize = nullptr;
|
|
PFN_vkGetDescriptorSetLayoutBindingOffsetEXT vkGetDescriptorSetLayoutBindingOffset = nullptr;
|
|
PFN_vkGetDescriptorEXT vkGetDescriptor = nullptr;
|
|
PFN_vkCmdBindDescriptorBuffersEXT vkCmdBindDescriptorBuffers = nullptr;
|
|
PFN_vkCmdSetDescriptorBufferOffsetsEXT vkCmdSetDescriptorBufferOffsets = nullptr;
|
|
|
|
} functions;
|
|
|
|
VKMemoryPools vma_pools;
|
|
|
|
const char *extension_name_get(int index) const
|
|
{
|
|
return device_extensions_[index].extensionName;
|
|
}
|
|
|
|
VkPhysicalDevice physical_device_get() const
|
|
{
|
|
return vk_physical_device_;
|
|
}
|
|
|
|
const VkPhysicalDeviceProperties &physical_device_properties_get() const
|
|
{
|
|
return vk_physical_device_properties_;
|
|
}
|
|
|
|
inline const VkPhysicalDeviceMaintenance4Properties &
|
|
physical_device_maintenance4_properties_get() const
|
|
{
|
|
return vk_physical_device_maintenance4_properties_;
|
|
}
|
|
|
|
const VkPhysicalDeviceIDProperties &physical_device_id_properties_get() const
|
|
{
|
|
return vk_physical_device_id_properties_;
|
|
}
|
|
|
|
inline const VkPhysicalDeviceDescriptorBufferPropertiesEXT &
|
|
physical_device_descriptor_buffer_properties_get() const
|
|
{
|
|
return vk_physical_device_descriptor_buffer_properties_;
|
|
}
|
|
|
|
const VkPhysicalDeviceFeatures &physical_device_features_get() const
|
|
{
|
|
return vk_physical_device_features_;
|
|
}
|
|
|
|
const VkPhysicalDeviceVulkan11Features &physical_device_vulkan_11_features_get() const
|
|
{
|
|
return vk_physical_device_vulkan_11_features_;
|
|
}
|
|
|
|
const VkPhysicalDeviceVulkan12Features &physical_device_vulkan_12_features_get() const
|
|
{
|
|
return vk_physical_device_vulkan_12_features_;
|
|
}
|
|
|
|
VkInstance instance_get() const
|
|
{
|
|
return vk_instance_;
|
|
};
|
|
|
|
VkDevice vk_handle() const
|
|
{
|
|
return vk_device_;
|
|
}
|
|
|
|
uint32_t queue_family_get() const
|
|
{
|
|
return vk_queue_family_;
|
|
}
|
|
|
|
inline VmaAllocator mem_allocator_get() const
|
|
{
|
|
return mem_allocator_;
|
|
}
|
|
|
|
VKDescriptorSetLayouts &descriptor_set_layouts_get()
|
|
{
|
|
return descriptor_set_layouts_;
|
|
}
|
|
|
|
debug::VKDebuggingTools &debugging_tools_get()
|
|
{
|
|
return debugging_tools_;
|
|
}
|
|
|
|
const debug::VKDebuggingTools &debugging_tools_get() const
|
|
{
|
|
return debugging_tools_;
|
|
}
|
|
|
|
const VKSamplers &samplers() const
|
|
{
|
|
return samplers_;
|
|
}
|
|
|
|
void init(void *ghost_context);
|
|
void reinit();
|
|
void deinit();
|
|
bool is_initialized() const
|
|
{
|
|
return is_initialized_;
|
|
}
|
|
|
|
GPUDeviceType device_type() const;
|
|
GPUDriverType driver_type() const;
|
|
std::string vendor_name() const;
|
|
std::string driver_version() const;
|
|
|
|
/**
|
|
* Check if a specific extension is supported by the device.
|
|
*
|
|
* This should be called from vk_backend to set the correct capabilities and workarounds needed
|
|
* for this device.
|
|
*/
|
|
bool supports_extension(const char *extension_name) const;
|
|
|
|
const VKWorkarounds &workarounds_get() const
|
|
{
|
|
return workarounds_;
|
|
}
|
|
inline const VKExtensions &extensions_get() const
|
|
{
|
|
return extensions_;
|
|
}
|
|
|
|
std::string glsl_vertex_patch_get() const;
|
|
std::string glsl_geometry_patch_get() const;
|
|
std::string glsl_fragment_patch_get() const;
|
|
std::string glsl_compute_patch_get() const;
|
|
shader::GeneratedSource extensions_define(StringRefNull stage_define) const;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/** \name Render graph
|
|
* \{ */
|
|
static void submission_runner(TaskPool *__restrict pool, void *task_data);
|
|
render_graph::VKRenderGraph *render_graph_new();
|
|
|
|
TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph,
|
|
VKDiscardPool &context_discard_pool,
|
|
bool submit_to_device,
|
|
bool wait_for_completion,
|
|
VkPipelineStageFlags wait_dst_stage_mask,
|
|
VkSemaphore wait_semaphore,
|
|
VkSemaphore signal_semaphore,
|
|
VkFence signal_fence);
|
|
void wait_for_timeline(TimelineValue timeline);
|
|
void wait_queue_idle();
|
|
|
|
/**
|
|
* Retrieve the last finished submission timeline.
|
|
*/
|
|
TimelineValue submission_finished_timeline_get() const
|
|
{
|
|
BLI_assert(vk_timeline_semaphore_ != VK_NULL_HANDLE);
|
|
TimelineValue current_timeline;
|
|
VkResult result = vkGetSemaphoreCounterValue(
|
|
vk_device_, vk_timeline_semaphore_, ¤t_timeline);
|
|
UNUSED_VARS(result);
|
|
BLI_assert_msg(
|
|
result == VK_SUCCESS && current_timeline != UINT64_MAX,
|
|
"Potential driver crash has happened. Several drivers will report UINT64_MAX when "
|
|
"requesting a counter value of an timeline semaphore right after/during a driver reset. "
|
|
"If this happen we should investigate what makes the driver crash. In the past this has "
|
|
"been detected on QUALCOMM and NVIDIA drivers. The result code of the call is "
|
|
"VK_SUCCESS.");
|
|
return current_timeline;
|
|
}
|
|
|
|
/** \} */
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/** \name Resource management
|
|
* \{ */
|
|
|
|
/**
|
|
* Get or create current thread data.
|
|
*/
|
|
VKThreadData ¤t_thread_data();
|
|
|
|
#if 0
|
|
/**
|
|
* Get the discard pool for the current thread.
|
|
*
|
|
* When the active thread has a context a discard pool associated to the thread is returned.
|
|
* When there is no context the orphan discard pool is returned.
|
|
*
|
|
* A thread with a context can have multiple discard pools. One for each swap-chain image.
|
|
* A thread without a context is most likely a discarded resource triggered during dependency
|
|
* graph update. A dependency graph update from the viewport during playback or editing;
|
|
* or a dependency graph update when rendering.
|
|
* These can happen from a different thread which will don't have a context at all.
|
|
* \param thread_safe: Caller thread already owns the resources mutex and is safe to run this
|
|
* function without trying to reacquire resources mutex making a deadlock.
|
|
*/
|
|
VKDiscardPool &discard_pool_for_current_thread(bool thread_safe = false);
|
|
#endif
|
|
|
|
void context_register(VKContext &context);
|
|
void context_unregister(VKContext &context);
|
|
Span<std::reference_wrapper<VKContext>> contexts_get() const;
|
|
|
|
void memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb) const;
|
|
static void debug_print(std::ostream &os, const VKDiscardPool &discard_pool);
|
|
void debug_print();
|
|
|
|
/** \} */
|
|
|
|
Shader *vk_backbuffer_blit_sh_get()
|
|
{
|
|
if (vk_backbuffer_blit_sh_ == nullptr) {
|
|
/* See display_as_extended_srgb in libocio_display_processor.cc for details on this choice. */
|
|
#if defined(_WIN32) || defined(__APPLE__)
|
|
vk_backbuffer_blit_sh_ = GPU_shader_create_from_info_name("vk_backbuffer_blit");
|
|
#else
|
|
vk_backbuffer_blit_sh_ = GPU_shader_create_from_info_name("vk_backbuffer_blit_gamma22");
|
|
#endif
|
|
}
|
|
return vk_backbuffer_blit_sh_;
|
|
}
|
|
|
|
private:
|
|
void init_physical_device_properties();
|
|
void init_physical_device_memory_properties();
|
|
void init_physical_device_features();
|
|
void init_physical_device_extensions();
|
|
void init_debug_callbacks();
|
|
void init_submission_pool();
|
|
void deinit_submission_pool();
|
|
/**
|
|
* Initialize the functions struct with extension specific function pointer.
|
|
*/
|
|
void init_functions();
|
|
|
|
/**
|
|
* Initialize a dummy buffer that can be bound for missing attributes.
|
|
*/
|
|
void init_dummy_buffer();
|
|
|
|
/* During initialization the backend requires access to update the workarounds. */
|
|
friend VKBackend;
|
|
};
|
|
|
|
} // namespace blender::gpu
|