Vulkan: Swapchain synchronization

This PR adds swapchain synchronization. When the swapchain swaps the
buffers it can add a wait semaphore/signal semaphore to support GPU
based synchronization

10 times playback of `rain_restaurant.blend` on AMD RX 7700
Before: 10 × Animation playback: 72347.5540 ms, average: 7234.75539684 ms
After: 10 × Animation playback: 41523.2441 ms, average: 4152.32441425 ms

Getting around the OpenGL performance target.

Pull Request: https://projects.blender.org/blender/blender/pulls/136259
This commit is contained in:
Jeroen Bakker
2025-03-24 10:28:52 +01:00
parent fa6e104362
commit 409ce2b976
7 changed files with 146 additions and 32 deletions

View File

@@ -746,6 +746,10 @@ typedef struct {
VkSurfaceFormatKHR surface_format;
/** Resolution of the image. */
VkExtent2D extent;
/** Semaphore to wait before updating the image. */
VkSemaphore acquire_semaphore;
/** Semaphore to signal after the image has been updated. */
VkSemaphore present_semaphore;
} GHOST_VulkanSwapChainData;
typedef struct {

View File

@@ -485,7 +485,7 @@ GHOST_ContextVK::GHOST_ContextVK(bool stereoVisual,
m_command_buffer(VK_NULL_HANDLE),
m_surface(VK_NULL_HANDLE),
m_swapchain(VK_NULL_HANDLE),
m_fence(VK_NULL_HANDLE)
m_render_frame(0)
{
}
@@ -523,10 +523,16 @@ GHOST_TSuccess GHOST_ContextVK::destroySwapchain()
if (m_swapchain != VK_NULL_HANDLE) {
vkDestroySwapchainKHR(device, m_swapchain, nullptr);
}
if (m_fence != VK_NULL_HANDLE) {
vkDestroyFence(device, m_fence, nullptr);
m_fence = VK_NULL_HANDLE;
VK_CHECK(vkDeviceWaitIdle(device));
for (VkSemaphore semaphore : m_acquire_semaphores) {
vkDestroySemaphore(device, semaphore, nullptr);
}
m_acquire_semaphores.clear();
for (VkSemaphore semaphore : m_present_semaphores) {
vkDestroySemaphore(device, semaphore, nullptr);
}
m_present_semaphores.clear();
return GHOST_kSuccess;
}
@@ -562,21 +568,27 @@ GHOST_TSuccess GHOST_ContextVK::swapBuffers()
* swapchain image. Other do it when calling vkQueuePresent. */
VkResult result = VK_ERROR_OUT_OF_DATE_KHR;
uint32_t image_index = 0;
int32_t render_frame = 0;
while (result == VK_ERROR_OUT_OF_DATE_KHR) {
result = vkAcquireNextImageKHR(
device, m_swapchain, UINT64_MAX, VK_NULL_HANDLE, m_fence, &image_index);
render_frame = (m_render_frame + 1) % m_acquire_semaphores.size();
result = vkAcquireNextImageKHR(device,
m_swapchain,
UINT64_MAX,
m_acquire_semaphores[render_frame],
VK_NULL_HANDLE,
&image_index);
if (result == VK_ERROR_OUT_OF_DATE_KHR) {
destroySwapchain();
createSwapchain();
}
}
VK_CHECK(vkWaitForFences(device, 1, &m_fence, VK_TRUE, UINT64_MAX));
VK_CHECK(vkResetFences(device, 1, &m_fence));
GHOST_VulkanSwapChainData swap_chain_data;
swap_chain_data.image = m_swapchain_images[image_index];
swap_chain_data.surface_format = m_surface_format;
swap_chain_data.extent = m_render_extent;
swap_chain_data.acquire_semaphore = m_acquire_semaphores[render_frame];
swap_chain_data.present_semaphore = m_present_semaphores[render_frame];
if (swap_buffers_pre_callback_) {
swap_buffers_pre_callback_(&swap_chain_data);
@@ -584,8 +596,8 @@ GHOST_TSuccess GHOST_ContextVK::swapBuffers()
VkPresentInfoKHR present_info = {};
present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
present_info.waitSemaphoreCount = 0;
present_info.pWaitSemaphores = nullptr;
present_info.waitSemaphoreCount = 1;
present_info.pWaitSemaphores = &m_present_semaphores[render_frame];
present_info.swapchainCount = 1;
present_info.pSwapchains = &m_swapchain;
present_info.pImageIndices = &image_index;
@@ -887,10 +899,17 @@ GHOST_TSuccess GHOST_ContextVK::createSwapchain()
vkGetSwapchainImagesKHR(device, m_swapchain, &image_count, nullptr);
m_swapchain_images.resize(image_count);
vkGetSwapchainImagesKHR(device, m_swapchain, &image_count, m_swapchain_images.data());
VkFenceCreateInfo fence_info = {};
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
VK_CHECK(vkCreateFence(device, &fence_info, nullptr, &m_fence));
const VkSemaphoreCreateInfo vk_semaphore_create_info = {
VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0};
m_acquire_semaphores.resize(image_count);
m_present_semaphores.resize(image_count);
for (int index = 0; index < image_count; index++) {
VK_CHECK(vkCreateSemaphore(
device, &vk_semaphore_create_info, nullptr, &m_acquire_semaphores[index]));
VK_CHECK(vkCreateSemaphore(
device, &vk_semaphore_create_info, nullptr, &m_present_semaphores[index]));
}
m_render_frame = 0;
/* Change image layout from VK_IMAGE_LAYOUT_UNDEFINED to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR. */
VkCommandBufferBeginInfo begin_info = {};

View File

@@ -183,11 +183,13 @@ class GHOST_ContextVK : public GHOST_Context {
VkSurfaceKHR m_surface;
VkSwapchainKHR m_swapchain;
std::vector<VkImage> m_swapchain_images;
std::vector<VkSemaphore> m_acquire_semaphores;
std::vector<VkSemaphore> m_present_semaphores;
uint32_t m_render_frame;
VkExtent2D m_render_extent;
VkExtent2D m_render_extent_min;
VkSurfaceFormatKHR m_surface_format;
VkFence m_fence;
std::function<void(const GHOST_VulkanSwapChainData *)> swap_buffers_pre_callback_;
std::function<void(void)> swap_buffers_post_callback_;

View File

@@ -145,7 +145,10 @@ void VKContext::end_frame()
void VKContext::flush() {}
TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags)
TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags,
VkPipelineStageFlags wait_dst_stage_mask,
VkSemaphore wait_semaphore,
VkSemaphore signal_semaphore)
{
if (has_active_framebuffer()) {
VKFrameBuffer &framebuffer = *active_framebuffer_get();
@@ -159,7 +162,10 @@ TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags)
&render_graph_.value().get(),
discard_pool,
bool(flags & RenderGraphFlushFlags::SUBMIT),
bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION));
bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION),
wait_dst_stage_mask,
wait_semaphore,
signal_semaphore);
render_graph_.reset();
if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) {
render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
@@ -366,6 +372,8 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c
device.resources.add_image(swap_chain_data.image, 1, "SwapchainImage");
framebuffer.rendering_end(*this);
flush_render_graph(RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
render_graph::VKRenderGraph &render_graph = this->render_graph();
render_graph.add_node(blit_image);
GPU_debug_group_end();
@@ -375,8 +383,10 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c
synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
render_graph.add_node(synchronization);
flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::RENEW_RENDER_GRAPH,
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
swap_chain_data.acquire_semaphore,
swap_chain_data.present_semaphore);
device.resources.remove_image(swap_chain_data.image);
#if 0

View File

@@ -71,7 +71,11 @@ class VKContext : public Context, NonCopyable {
void flush() override;
TimelineValue flush_render_graph(RenderGraphFlushFlags flags);
TimelineValue flush_render_graph(
RenderGraphFlushFlags flags,
VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_NONE,
VkSemaphore wait_semaphore = VK_NULL_HANDLE,
VkSemaphore signal_semaphore = VK_NULL_HANDLE);
void finish() override;
void memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb) override;

View File

@@ -351,7 +351,10 @@ class VKDevice : public NonCopyable {
TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph,
VKDiscardPool &context_discard_pool,
bool submit_to_device,
bool wait_for_completion);
bool wait_for_completion,
VkPipelineStageFlags wait_dst_stage_mask,
VkSemaphore wait_semaphore,
VkSemaphore signal_semaphore);
void wait_for_timeline(TimelineValue timeline);
/**

View File

@@ -6,6 +6,9 @@
* \ingroup gpu
*/
#include <chrono>
#include <thread>
#include "vk_device.hh"
namespace blender::gpu {
@@ -18,12 +21,19 @@ struct VKRenderGraphSubmitTask {
render_graph::VKRenderGraph *render_graph;
uint64_t timeline;
bool submit_to_device;
VkPipelineStageFlags wait_dst_stage_mask;
VkSemaphore wait_semaphore;
VkSemaphore signal_semaphore;
bool *is_submitted_ptr;
};
TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_graph,
VKDiscardPool &context_discard_pool,
bool submit_to_device,
bool wait_for_completion)
bool wait_for_completion,
VkPipelineStageFlags wait_dst_stage_mask,
VkSemaphore wait_semaphore,
VkSemaphore signal_semaphore)
{
if (render_graph->is_empty()) {
render_graph->reset();
@@ -34,13 +44,32 @@ TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_
VKRenderGraphSubmitTask *submit_task = MEM_new<VKRenderGraphSubmitTask>(__func__);
submit_task->render_graph = render_graph;
submit_task->submit_to_device = submit_to_device;
submit_task->wait_dst_stage_mask = wait_dst_stage_mask;
submit_task->wait_semaphore = wait_semaphore;
submit_task->signal_semaphore = signal_semaphore;
submit_task->is_submitted_ptr = nullptr;
/* We need to wait for submission as otherwise the signal semaphore can still not be in an
* initial state. */
const bool wait_for_submission = signal_semaphore != VK_NULL_HANDLE && !wait_for_completion;
bool is_submitted = false;
if (wait_for_submission) {
submit_task->is_submitted_ptr = &is_submitted;
}
TimelineValue timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ :
timeline_value_ + 1;
orphaned_data.timeline_ = timeline + 1;
orphaned_data.move_data(context_discard_pool, timeline);
BLI_thread_queue_push(submitted_render_graphs_, submit_task);
submit_task = nullptr;
if (wait_for_submission) {
while (!is_submitted) {
using namespace std::chrono_literals;
std::this_thread::sleep_for(1ns);
}
}
if (wait_for_completion) {
wait_for_timeline(timeline);
}
@@ -89,6 +118,9 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
Vector<VkCommandBuffer> command_buffers_unused;
TimelineResources<VkCommandBuffer> command_buffers_in_use;
VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE;
Vector<VkCommandBuffer> unsubmitted_command_buffers;
Vector<VkSubmitInfo> submit_infos;
submit_infos.reserve(2);
std::optional<render_graph::VKCommandBufferWrapper> command_buffer;
while (device->lifetime < Lifetime::DEINITIALIZING) {
@@ -98,6 +130,15 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
continue;
}
/* End current command buffer when we need to wait for a semaphore. In this case all previous
* recorded commands can run before the wait semaphores. The commands that must be guarded by
* the semaphores are part of the new submitted render graph. */
if (submit_task->wait_semaphore != VK_NULL_HANDLE && command_buffer.has_value()) {
command_buffer->end_recording();
unsubmitted_command_buffers.append(vk_command_buffer);
command_buffer.reset();
}
if (!command_buffer.has_value()) {
/* Check for completed command buffers that can be reused. */
if (command_buffers_unused.is_empty()) {
@@ -138,30 +179,61 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
command_builder.record_commands(render_graph, *command_buffer, node_handles);
if (submit_task->submit_to_device) {
/* Create submit infos for previous command buffers. */
submit_infos.clear();
if (!unsubmitted_command_buffers.is_empty()) {
VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
nullptr,
0,
nullptr,
nullptr,
uint32_t(unsubmitted_command_buffers.size()),
unsubmitted_command_buffers.data(),
0,
nullptr};
submit_infos.append(vk_submit_info);
}
/* Finalize current command buffer. */
command_buffer->end_recording();
unsubmitted_command_buffers.append(vk_command_buffer);
uint32_t wait_semaphore_len = submit_task->wait_semaphore == VK_NULL_HANDLE ? 0 : 1;
uint32_t signal_semaphore_len = submit_task->signal_semaphore == VK_NULL_HANDLE ? 1 : 2;
VkSemaphore signal_semaphores[2] = {device->vk_timeline_semaphore_,
submit_task->signal_semaphore};
uint64_t signal_semaphore_values[2] = {submit_task->timeline, 0};
VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = {
VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
nullptr,
0,
nullptr,
1,
&submit_task->timeline};
signal_semaphore_len,
signal_semaphore_values};
VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
&vk_timeline_semaphore_submit_info,
0,
nullptr,
nullptr,
wait_semaphore_len,
&submit_task->wait_semaphore,
&submit_task->wait_dst_stage_mask,
1,
&vk_command_buffer,
1,
&device->vk_timeline_semaphore_};
&unsubmitted_command_buffers.last(),
signal_semaphore_len,
signal_semaphores};
submit_infos.append(vk_submit_info);
{
std::scoped_lock lock_queue(*device->queue_mutex_);
vkQueueSubmit(device->vk_queue_, 1, &vk_submit_info, VK_NULL_HANDLE);
vkQueueSubmit(device->vk_queue_, submit_infos.size(), submit_infos.data(), VK_NULL_HANDLE);
}
if (submit_task->is_submitted_ptr != nullptr) {
*submit_task->is_submitted_ptr = true;
}
command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
vk_command_buffer = VK_NULL_HANDLE;
for (VkCommandBuffer vk_command_buffer : unsubmitted_command_buffers) {
command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
}
unsubmitted_command_buffers.clear();
command_buffer.reset();
}