Vulkan: Swapchain synchronization
This PR adds swapchain synchronization. When the swapchain swaps the buffers it can add a wait semaphore/signal semaphore to support GPU based synchronization 10 times playback of `rain_restaurant.blend` on AMD RX 7700 Before: 10 × Animation playback: 72347.5540 ms, average: 7234.75539684 ms After: 10 × Animation playback: 41523.2441 ms, average: 4152.32441425 ms Getting around the OpenGL performance target. Pull Request: https://projects.blender.org/blender/blender/pulls/136259
This commit is contained in:
@@ -746,6 +746,10 @@ typedef struct {
|
||||
VkSurfaceFormatKHR surface_format;
|
||||
/** Resolution of the image. */
|
||||
VkExtent2D extent;
|
||||
/** Semaphore to wait before updating the image. */
|
||||
VkSemaphore acquire_semaphore;
|
||||
/** Semaphore to signal after the image has been updated. */
|
||||
VkSemaphore present_semaphore;
|
||||
} GHOST_VulkanSwapChainData;
|
||||
|
||||
typedef struct {
|
||||
|
||||
@@ -485,7 +485,7 @@ GHOST_ContextVK::GHOST_ContextVK(bool stereoVisual,
|
||||
m_command_buffer(VK_NULL_HANDLE),
|
||||
m_surface(VK_NULL_HANDLE),
|
||||
m_swapchain(VK_NULL_HANDLE),
|
||||
m_fence(VK_NULL_HANDLE)
|
||||
m_render_frame(0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -523,10 +523,16 @@ GHOST_TSuccess GHOST_ContextVK::destroySwapchain()
|
||||
if (m_swapchain != VK_NULL_HANDLE) {
|
||||
vkDestroySwapchainKHR(device, m_swapchain, nullptr);
|
||||
}
|
||||
if (m_fence != VK_NULL_HANDLE) {
|
||||
vkDestroyFence(device, m_fence, nullptr);
|
||||
m_fence = VK_NULL_HANDLE;
|
||||
VK_CHECK(vkDeviceWaitIdle(device));
|
||||
for (VkSemaphore semaphore : m_acquire_semaphores) {
|
||||
vkDestroySemaphore(device, semaphore, nullptr);
|
||||
}
|
||||
m_acquire_semaphores.clear();
|
||||
for (VkSemaphore semaphore : m_present_semaphores) {
|
||||
vkDestroySemaphore(device, semaphore, nullptr);
|
||||
}
|
||||
m_present_semaphores.clear();
|
||||
|
||||
return GHOST_kSuccess;
|
||||
}
|
||||
|
||||
@@ -562,21 +568,27 @@ GHOST_TSuccess GHOST_ContextVK::swapBuffers()
|
||||
* swapchain image. Other do it when calling vkQueuePresent. */
|
||||
VkResult result = VK_ERROR_OUT_OF_DATE_KHR;
|
||||
uint32_t image_index = 0;
|
||||
int32_t render_frame = 0;
|
||||
while (result == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
result = vkAcquireNextImageKHR(
|
||||
device, m_swapchain, UINT64_MAX, VK_NULL_HANDLE, m_fence, &image_index);
|
||||
render_frame = (m_render_frame + 1) % m_acquire_semaphores.size();
|
||||
result = vkAcquireNextImageKHR(device,
|
||||
m_swapchain,
|
||||
UINT64_MAX,
|
||||
m_acquire_semaphores[render_frame],
|
||||
VK_NULL_HANDLE,
|
||||
&image_index);
|
||||
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
destroySwapchain();
|
||||
createSwapchain();
|
||||
}
|
||||
}
|
||||
VK_CHECK(vkWaitForFences(device, 1, &m_fence, VK_TRUE, UINT64_MAX));
|
||||
VK_CHECK(vkResetFences(device, 1, &m_fence));
|
||||
|
||||
GHOST_VulkanSwapChainData swap_chain_data;
|
||||
swap_chain_data.image = m_swapchain_images[image_index];
|
||||
swap_chain_data.surface_format = m_surface_format;
|
||||
swap_chain_data.extent = m_render_extent;
|
||||
swap_chain_data.acquire_semaphore = m_acquire_semaphores[render_frame];
|
||||
swap_chain_data.present_semaphore = m_present_semaphores[render_frame];
|
||||
|
||||
if (swap_buffers_pre_callback_) {
|
||||
swap_buffers_pre_callback_(&swap_chain_data);
|
||||
@@ -584,8 +596,8 @@ GHOST_TSuccess GHOST_ContextVK::swapBuffers()
|
||||
|
||||
VkPresentInfoKHR present_info = {};
|
||||
present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
|
||||
present_info.waitSemaphoreCount = 0;
|
||||
present_info.pWaitSemaphores = nullptr;
|
||||
present_info.waitSemaphoreCount = 1;
|
||||
present_info.pWaitSemaphores = &m_present_semaphores[render_frame];
|
||||
present_info.swapchainCount = 1;
|
||||
present_info.pSwapchains = &m_swapchain;
|
||||
present_info.pImageIndices = &image_index;
|
||||
@@ -887,10 +899,17 @@ GHOST_TSuccess GHOST_ContextVK::createSwapchain()
|
||||
vkGetSwapchainImagesKHR(device, m_swapchain, &image_count, nullptr);
|
||||
m_swapchain_images.resize(image_count);
|
||||
vkGetSwapchainImagesKHR(device, m_swapchain, &image_count, m_swapchain_images.data());
|
||||
|
||||
VkFenceCreateInfo fence_info = {};
|
||||
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
VK_CHECK(vkCreateFence(device, &fence_info, nullptr, &m_fence));
|
||||
const VkSemaphoreCreateInfo vk_semaphore_create_info = {
|
||||
VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0};
|
||||
m_acquire_semaphores.resize(image_count);
|
||||
m_present_semaphores.resize(image_count);
|
||||
for (int index = 0; index < image_count; index++) {
|
||||
VK_CHECK(vkCreateSemaphore(
|
||||
device, &vk_semaphore_create_info, nullptr, &m_acquire_semaphores[index]));
|
||||
VK_CHECK(vkCreateSemaphore(
|
||||
device, &vk_semaphore_create_info, nullptr, &m_present_semaphores[index]));
|
||||
}
|
||||
m_render_frame = 0;
|
||||
|
||||
/* Change image layout from VK_IMAGE_LAYOUT_UNDEFINED to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR. */
|
||||
VkCommandBufferBeginInfo begin_info = {};
|
||||
|
||||
@@ -183,11 +183,13 @@ class GHOST_ContextVK : public GHOST_Context {
|
||||
VkSurfaceKHR m_surface;
|
||||
VkSwapchainKHR m_swapchain;
|
||||
std::vector<VkImage> m_swapchain_images;
|
||||
std::vector<VkSemaphore> m_acquire_semaphores;
|
||||
std::vector<VkSemaphore> m_present_semaphores;
|
||||
uint32_t m_render_frame;
|
||||
|
||||
VkExtent2D m_render_extent;
|
||||
VkExtent2D m_render_extent_min;
|
||||
VkSurfaceFormatKHR m_surface_format;
|
||||
VkFence m_fence;
|
||||
|
||||
std::function<void(const GHOST_VulkanSwapChainData *)> swap_buffers_pre_callback_;
|
||||
std::function<void(void)> swap_buffers_post_callback_;
|
||||
|
||||
@@ -145,7 +145,10 @@ void VKContext::end_frame()
|
||||
|
||||
void VKContext::flush() {}
|
||||
|
||||
TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags)
|
||||
TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags,
|
||||
VkPipelineStageFlags wait_dst_stage_mask,
|
||||
VkSemaphore wait_semaphore,
|
||||
VkSemaphore signal_semaphore)
|
||||
{
|
||||
if (has_active_framebuffer()) {
|
||||
VKFrameBuffer &framebuffer = *active_framebuffer_get();
|
||||
@@ -159,7 +162,10 @@ TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags)
|
||||
&render_graph_.value().get(),
|
||||
discard_pool,
|
||||
bool(flags & RenderGraphFlushFlags::SUBMIT),
|
||||
bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION));
|
||||
bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION),
|
||||
wait_dst_stage_mask,
|
||||
wait_semaphore,
|
||||
signal_semaphore);
|
||||
render_graph_.reset();
|
||||
if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) {
|
||||
render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
|
||||
@@ -366,6 +372,8 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c
|
||||
device.resources.add_image(swap_chain_data.image, 1, "SwapchainImage");
|
||||
|
||||
framebuffer.rendering_end(*this);
|
||||
flush_render_graph(RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
|
||||
|
||||
render_graph::VKRenderGraph &render_graph = this->render_graph();
|
||||
render_graph.add_node(blit_image);
|
||||
GPU_debug_group_end();
|
||||
@@ -375,8 +383,10 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c
|
||||
synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
render_graph.add_node(synchronization);
|
||||
flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
|
||||
RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
|
||||
flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::RENEW_RENDER_GRAPH,
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
swap_chain_data.acquire_semaphore,
|
||||
swap_chain_data.present_semaphore);
|
||||
|
||||
device.resources.remove_image(swap_chain_data.image);
|
||||
#if 0
|
||||
|
||||
@@ -71,7 +71,11 @@ class VKContext : public Context, NonCopyable {
|
||||
|
||||
void flush() override;
|
||||
|
||||
TimelineValue flush_render_graph(RenderGraphFlushFlags flags);
|
||||
TimelineValue flush_render_graph(
|
||||
RenderGraphFlushFlags flags,
|
||||
VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_NONE,
|
||||
VkSemaphore wait_semaphore = VK_NULL_HANDLE,
|
||||
VkSemaphore signal_semaphore = VK_NULL_HANDLE);
|
||||
void finish() override;
|
||||
|
||||
void memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb) override;
|
||||
|
||||
@@ -351,7 +351,10 @@ class VKDevice : public NonCopyable {
|
||||
TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph,
|
||||
VKDiscardPool &context_discard_pool,
|
||||
bool submit_to_device,
|
||||
bool wait_for_completion);
|
||||
bool wait_for_completion,
|
||||
VkPipelineStageFlags wait_dst_stage_mask,
|
||||
VkSemaphore wait_semaphore,
|
||||
VkSemaphore signal_semaphore);
|
||||
void wait_for_timeline(TimelineValue timeline);
|
||||
|
||||
/**
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
* \ingroup gpu
|
||||
*/
|
||||
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
#include "vk_device.hh"
|
||||
|
||||
namespace blender::gpu {
|
||||
@@ -18,12 +21,19 @@ struct VKRenderGraphSubmitTask {
|
||||
render_graph::VKRenderGraph *render_graph;
|
||||
uint64_t timeline;
|
||||
bool submit_to_device;
|
||||
VkPipelineStageFlags wait_dst_stage_mask;
|
||||
VkSemaphore wait_semaphore;
|
||||
VkSemaphore signal_semaphore;
|
||||
bool *is_submitted_ptr;
|
||||
};
|
||||
|
||||
TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_graph,
|
||||
VKDiscardPool &context_discard_pool,
|
||||
bool submit_to_device,
|
||||
bool wait_for_completion)
|
||||
bool wait_for_completion,
|
||||
VkPipelineStageFlags wait_dst_stage_mask,
|
||||
VkSemaphore wait_semaphore,
|
||||
VkSemaphore signal_semaphore)
|
||||
{
|
||||
if (render_graph->is_empty()) {
|
||||
render_graph->reset();
|
||||
@@ -34,13 +44,32 @@ TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_
|
||||
VKRenderGraphSubmitTask *submit_task = MEM_new<VKRenderGraphSubmitTask>(__func__);
|
||||
submit_task->render_graph = render_graph;
|
||||
submit_task->submit_to_device = submit_to_device;
|
||||
submit_task->wait_dst_stage_mask = wait_dst_stage_mask;
|
||||
submit_task->wait_semaphore = wait_semaphore;
|
||||
submit_task->signal_semaphore = signal_semaphore;
|
||||
submit_task->is_submitted_ptr = nullptr;
|
||||
/* We need to wait for submission as otherwise the signal semaphore can still not be in an
|
||||
* initial state. */
|
||||
const bool wait_for_submission = signal_semaphore != VK_NULL_HANDLE && !wait_for_completion;
|
||||
bool is_submitted = false;
|
||||
if (wait_for_submission) {
|
||||
submit_task->is_submitted_ptr = &is_submitted;
|
||||
}
|
||||
TimelineValue timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ :
|
||||
timeline_value_ + 1;
|
||||
orphaned_data.timeline_ = timeline + 1;
|
||||
orphaned_data.move_data(context_discard_pool, timeline);
|
||||
|
||||
BLI_thread_queue_push(submitted_render_graphs_, submit_task);
|
||||
submit_task = nullptr;
|
||||
|
||||
if (wait_for_submission) {
|
||||
while (!is_submitted) {
|
||||
using namespace std::chrono_literals;
|
||||
std::this_thread::sleep_for(1ns);
|
||||
}
|
||||
}
|
||||
|
||||
if (wait_for_completion) {
|
||||
wait_for_timeline(timeline);
|
||||
}
|
||||
@@ -89,6 +118,9 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
|
||||
Vector<VkCommandBuffer> command_buffers_unused;
|
||||
TimelineResources<VkCommandBuffer> command_buffers_in_use;
|
||||
VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE;
|
||||
Vector<VkCommandBuffer> unsubmitted_command_buffers;
|
||||
Vector<VkSubmitInfo> submit_infos;
|
||||
submit_infos.reserve(2);
|
||||
std::optional<render_graph::VKCommandBufferWrapper> command_buffer;
|
||||
|
||||
while (device->lifetime < Lifetime::DEINITIALIZING) {
|
||||
@@ -98,6 +130,15 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
|
||||
continue;
|
||||
}
|
||||
|
||||
/* End current command buffer when we need to wait for a semaphore. In this case all previous
|
||||
* recorded commands can run before the wait semaphores. The commands that must be guarded by
|
||||
* the semaphores are part of the new submitted render graph. */
|
||||
if (submit_task->wait_semaphore != VK_NULL_HANDLE && command_buffer.has_value()) {
|
||||
command_buffer->end_recording();
|
||||
unsubmitted_command_buffers.append(vk_command_buffer);
|
||||
command_buffer.reset();
|
||||
}
|
||||
|
||||
if (!command_buffer.has_value()) {
|
||||
/* Check for completed command buffers that can be reused. */
|
||||
if (command_buffers_unused.is_empty()) {
|
||||
@@ -138,30 +179,61 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
|
||||
command_builder.record_commands(render_graph, *command_buffer, node_handles);
|
||||
|
||||
if (submit_task->submit_to_device) {
|
||||
/* Create submit infos for previous command buffers. */
|
||||
submit_infos.clear();
|
||||
if (!unsubmitted_command_buffers.is_empty()) {
|
||||
VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
uint32_t(unsubmitted_command_buffers.size()),
|
||||
unsubmitted_command_buffers.data(),
|
||||
0,
|
||||
nullptr};
|
||||
submit_infos.append(vk_submit_info);
|
||||
}
|
||||
|
||||
/* Finalize current command buffer. */
|
||||
command_buffer->end_recording();
|
||||
unsubmitted_command_buffers.append(vk_command_buffer);
|
||||
|
||||
uint32_t wait_semaphore_len = submit_task->wait_semaphore == VK_NULL_HANDLE ? 0 : 1;
|
||||
uint32_t signal_semaphore_len = submit_task->signal_semaphore == VK_NULL_HANDLE ? 1 : 2;
|
||||
VkSemaphore signal_semaphores[2] = {device->vk_timeline_semaphore_,
|
||||
submit_task->signal_semaphore};
|
||||
uint64_t signal_semaphore_values[2] = {submit_task->timeline, 0};
|
||||
|
||||
VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = {
|
||||
VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
|
||||
nullptr,
|
||||
0,
|
||||
nullptr,
|
||||
1,
|
||||
&submit_task->timeline};
|
||||
signal_semaphore_len,
|
||||
signal_semaphore_values};
|
||||
VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
|
||||
&vk_timeline_semaphore_submit_info,
|
||||
0,
|
||||
nullptr,
|
||||
nullptr,
|
||||
wait_semaphore_len,
|
||||
&submit_task->wait_semaphore,
|
||||
&submit_task->wait_dst_stage_mask,
|
||||
1,
|
||||
&vk_command_buffer,
|
||||
1,
|
||||
&device->vk_timeline_semaphore_};
|
||||
&unsubmitted_command_buffers.last(),
|
||||
signal_semaphore_len,
|
||||
signal_semaphores};
|
||||
submit_infos.append(vk_submit_info);
|
||||
|
||||
{
|
||||
std::scoped_lock lock_queue(*device->queue_mutex_);
|
||||
vkQueueSubmit(device->vk_queue_, 1, &vk_submit_info, VK_NULL_HANDLE);
|
||||
vkQueueSubmit(device->vk_queue_, submit_infos.size(), submit_infos.data(), VK_NULL_HANDLE);
|
||||
}
|
||||
if (submit_task->is_submitted_ptr != nullptr) {
|
||||
*submit_task->is_submitted_ptr = true;
|
||||
}
|
||||
command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
|
||||
vk_command_buffer = VK_NULL_HANDLE;
|
||||
for (VkCommandBuffer vk_command_buffer : unsubmitted_command_buffers) {
|
||||
command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
|
||||
}
|
||||
unsubmitted_command_buffers.clear();
|
||||
command_buffer.reset();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user