diff --git a/intern/ghost/GHOST_Types.h b/intern/ghost/GHOST_Types.h index 80a0f370b0a..130b9097b4d 100644 --- a/intern/ghost/GHOST_Types.h +++ b/intern/ghost/GHOST_Types.h @@ -746,6 +746,10 @@ typedef struct { VkSurfaceFormatKHR surface_format; /** Resolution of the image. */ VkExtent2D extent; + /** Semaphore to wait before updating the image. */ + VkSemaphore acquire_semaphore; + /** Semaphore to signal after the image has been updated. */ + VkSemaphore present_semaphore; } GHOST_VulkanSwapChainData; typedef struct { diff --git a/intern/ghost/intern/GHOST_ContextVK.cc b/intern/ghost/intern/GHOST_ContextVK.cc index a140a9ddf88..bb1db574edf 100644 --- a/intern/ghost/intern/GHOST_ContextVK.cc +++ b/intern/ghost/intern/GHOST_ContextVK.cc @@ -485,7 +485,7 @@ GHOST_ContextVK::GHOST_ContextVK(bool stereoVisual, m_command_buffer(VK_NULL_HANDLE), m_surface(VK_NULL_HANDLE), m_swapchain(VK_NULL_HANDLE), - m_fence(VK_NULL_HANDLE) + m_render_frame(0) { } @@ -523,10 +523,16 @@ GHOST_TSuccess GHOST_ContextVK::destroySwapchain() if (m_swapchain != VK_NULL_HANDLE) { vkDestroySwapchainKHR(device, m_swapchain, nullptr); } - if (m_fence != VK_NULL_HANDLE) { - vkDestroyFence(device, m_fence, nullptr); - m_fence = VK_NULL_HANDLE; + VK_CHECK(vkDeviceWaitIdle(device)); + for (VkSemaphore semaphore : m_acquire_semaphores) { + vkDestroySemaphore(device, semaphore, nullptr); } + m_acquire_semaphores.clear(); + for (VkSemaphore semaphore : m_present_semaphores) { + vkDestroySemaphore(device, semaphore, nullptr); + } + m_present_semaphores.clear(); + return GHOST_kSuccess; } @@ -562,21 +568,27 @@ GHOST_TSuccess GHOST_ContextVK::swapBuffers() * swapchain image. Other do it when calling vkQueuePresent. */ VkResult result = VK_ERROR_OUT_OF_DATE_KHR; uint32_t image_index = 0; + int32_t render_frame = 0; while (result == VK_ERROR_OUT_OF_DATE_KHR) { - result = vkAcquireNextImageKHR( - device, m_swapchain, UINT64_MAX, VK_NULL_HANDLE, m_fence, &image_index); + render_frame = (m_render_frame + 1) % m_acquire_semaphores.size(); + result = vkAcquireNextImageKHR(device, + m_swapchain, + UINT64_MAX, + m_acquire_semaphores[render_frame], + VK_NULL_HANDLE, + &image_index); if (result == VK_ERROR_OUT_OF_DATE_KHR) { destroySwapchain(); createSwapchain(); } } - VK_CHECK(vkWaitForFences(device, 1, &m_fence, VK_TRUE, UINT64_MAX)); - VK_CHECK(vkResetFences(device, 1, &m_fence)); GHOST_VulkanSwapChainData swap_chain_data; swap_chain_data.image = m_swapchain_images[image_index]; swap_chain_data.surface_format = m_surface_format; swap_chain_data.extent = m_render_extent; + swap_chain_data.acquire_semaphore = m_acquire_semaphores[render_frame]; + swap_chain_data.present_semaphore = m_present_semaphores[render_frame]; if (swap_buffers_pre_callback_) { swap_buffers_pre_callback_(&swap_chain_data); @@ -584,8 +596,8 @@ GHOST_TSuccess GHOST_ContextVK::swapBuffers() VkPresentInfoKHR present_info = {}; present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; - present_info.waitSemaphoreCount = 0; - present_info.pWaitSemaphores = nullptr; + present_info.waitSemaphoreCount = 1; + present_info.pWaitSemaphores = &m_present_semaphores[render_frame]; present_info.swapchainCount = 1; present_info.pSwapchains = &m_swapchain; present_info.pImageIndices = &image_index; @@ -887,10 +899,17 @@ GHOST_TSuccess GHOST_ContextVK::createSwapchain() vkGetSwapchainImagesKHR(device, m_swapchain, &image_count, nullptr); m_swapchain_images.resize(image_count); vkGetSwapchainImagesKHR(device, m_swapchain, &image_count, m_swapchain_images.data()); - - VkFenceCreateInfo fence_info = {}; - fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - VK_CHECK(vkCreateFence(device, &fence_info, nullptr, &m_fence)); + const VkSemaphoreCreateInfo vk_semaphore_create_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0}; + m_acquire_semaphores.resize(image_count); + m_present_semaphores.resize(image_count); + for (int index = 0; index < image_count; index++) { + VK_CHECK(vkCreateSemaphore( + device, &vk_semaphore_create_info, nullptr, &m_acquire_semaphores[index])); + VK_CHECK(vkCreateSemaphore( + device, &vk_semaphore_create_info, nullptr, &m_present_semaphores[index])); + } + m_render_frame = 0; /* Change image layout from VK_IMAGE_LAYOUT_UNDEFINED to VK_IMAGE_LAYOUT_PRESENT_SRC_KHR. */ VkCommandBufferBeginInfo begin_info = {}; diff --git a/intern/ghost/intern/GHOST_ContextVK.hh b/intern/ghost/intern/GHOST_ContextVK.hh index e9d52ae2c92..62cb98e538f 100644 --- a/intern/ghost/intern/GHOST_ContextVK.hh +++ b/intern/ghost/intern/GHOST_ContextVK.hh @@ -183,11 +183,13 @@ class GHOST_ContextVK : public GHOST_Context { VkSurfaceKHR m_surface; VkSwapchainKHR m_swapchain; std::vector m_swapchain_images; + std::vector m_acquire_semaphores; + std::vector m_present_semaphores; + uint32_t m_render_frame; VkExtent2D m_render_extent; VkExtent2D m_render_extent_min; VkSurfaceFormatKHR m_surface_format; - VkFence m_fence; std::function swap_buffers_pre_callback_; std::function swap_buffers_post_callback_; diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index 0600293bc6d..adbf667f662 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -145,7 +145,10 @@ void VKContext::end_frame() void VKContext::flush() {} -TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags) +TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags, + VkPipelineStageFlags wait_dst_stage_mask, + VkSemaphore wait_semaphore, + VkSemaphore signal_semaphore) { if (has_active_framebuffer()) { VKFrameBuffer &framebuffer = *active_framebuffer_get(); @@ -159,7 +162,10 @@ TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags) &render_graph_.value().get(), discard_pool, bool(flags & RenderGraphFlushFlags::SUBMIT), - bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION)); + bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION), + wait_dst_stage_mask, + wait_semaphore, + signal_semaphore); render_graph_.reset(); if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) { render_graph_ = std::reference_wrapper( @@ -366,6 +372,8 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c device.resources.add_image(swap_chain_data.image, 1, "SwapchainImage"); framebuffer.rendering_end(*this); + flush_render_graph(RenderGraphFlushFlags::RENEW_RENDER_GRAPH); + render_graph::VKRenderGraph &render_graph = this->render_graph(); render_graph.add_node(blit_image); GPU_debug_group_end(); @@ -375,8 +383,10 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; render_graph.add_node(synchronization); - flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION | - RenderGraphFlushFlags::RENEW_RENDER_GRAPH); + flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::RENEW_RENDER_GRAPH, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, + swap_chain_data.acquire_semaphore, + swap_chain_data.present_semaphore); device.resources.remove_image(swap_chain_data.image); #if 0 diff --git a/source/blender/gpu/vulkan/vk_context.hh b/source/blender/gpu/vulkan/vk_context.hh index 5f3ddf0792c..0c1933248dc 100644 --- a/source/blender/gpu/vulkan/vk_context.hh +++ b/source/blender/gpu/vulkan/vk_context.hh @@ -71,7 +71,11 @@ class VKContext : public Context, NonCopyable { void flush() override; - TimelineValue flush_render_graph(RenderGraphFlushFlags flags); + TimelineValue flush_render_graph( + RenderGraphFlushFlags flags, + VkPipelineStageFlags wait_dst_stage_mask = VK_PIPELINE_STAGE_NONE, + VkSemaphore wait_semaphore = VK_NULL_HANDLE, + VkSemaphore signal_semaphore = VK_NULL_HANDLE); void finish() override; void memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb) override; diff --git a/source/blender/gpu/vulkan/vk_device.hh b/source/blender/gpu/vulkan/vk_device.hh index 8a6da9c69b6..d21eb888c7c 100644 --- a/source/blender/gpu/vulkan/vk_device.hh +++ b/source/blender/gpu/vulkan/vk_device.hh @@ -351,7 +351,10 @@ class VKDevice : public NonCopyable { TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph, VKDiscardPool &context_discard_pool, bool submit_to_device, - bool wait_for_completion); + bool wait_for_completion, + VkPipelineStageFlags wait_dst_stage_mask, + VkSemaphore wait_semaphore, + VkSemaphore signal_semaphore); void wait_for_timeline(TimelineValue timeline); /** diff --git a/source/blender/gpu/vulkan/vk_device_submission.cc b/source/blender/gpu/vulkan/vk_device_submission.cc index 5c0c5082539..61c4cdb894a 100644 --- a/source/blender/gpu/vulkan/vk_device_submission.cc +++ b/source/blender/gpu/vulkan/vk_device_submission.cc @@ -6,6 +6,9 @@ * \ingroup gpu */ +#include +#include + #include "vk_device.hh" namespace blender::gpu { @@ -18,12 +21,19 @@ struct VKRenderGraphSubmitTask { render_graph::VKRenderGraph *render_graph; uint64_t timeline; bool submit_to_device; + VkPipelineStageFlags wait_dst_stage_mask; + VkSemaphore wait_semaphore; + VkSemaphore signal_semaphore; + bool *is_submitted_ptr; }; TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_graph, VKDiscardPool &context_discard_pool, bool submit_to_device, - bool wait_for_completion) + bool wait_for_completion, + VkPipelineStageFlags wait_dst_stage_mask, + VkSemaphore wait_semaphore, + VkSemaphore signal_semaphore) { if (render_graph->is_empty()) { render_graph->reset(); @@ -34,13 +44,32 @@ TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_ VKRenderGraphSubmitTask *submit_task = MEM_new(__func__); submit_task->render_graph = render_graph; submit_task->submit_to_device = submit_to_device; + submit_task->wait_dst_stage_mask = wait_dst_stage_mask; + submit_task->wait_semaphore = wait_semaphore; + submit_task->signal_semaphore = signal_semaphore; + submit_task->is_submitted_ptr = nullptr; + /* We need to wait for submission as otherwise the signal semaphore can still not be in an + * initial state. */ + const bool wait_for_submission = signal_semaphore != VK_NULL_HANDLE && !wait_for_completion; + bool is_submitted = false; + if (wait_for_submission) { + submit_task->is_submitted_ptr = &is_submitted; + } TimelineValue timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ : timeline_value_ + 1; orphaned_data.timeline_ = timeline + 1; orphaned_data.move_data(context_discard_pool, timeline); + BLI_thread_queue_push(submitted_render_graphs_, submit_task); submit_task = nullptr; + if (wait_for_submission) { + while (!is_submitted) { + using namespace std::chrono_literals; + std::this_thread::sleep_for(1ns); + } + } + if (wait_for_completion) { wait_for_timeline(timeline); } @@ -89,6 +118,9 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data) Vector command_buffers_unused; TimelineResources command_buffers_in_use; VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE; + Vector unsubmitted_command_buffers; + Vector submit_infos; + submit_infos.reserve(2); std::optional command_buffer; while (device->lifetime < Lifetime::DEINITIALIZING) { @@ -98,6 +130,15 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data) continue; } + /* End current command buffer when we need to wait for a semaphore. In this case all previous + * recorded commands can run before the wait semaphores. The commands that must be guarded by + * the semaphores are part of the new submitted render graph. */ + if (submit_task->wait_semaphore != VK_NULL_HANDLE && command_buffer.has_value()) { + command_buffer->end_recording(); + unsubmitted_command_buffers.append(vk_command_buffer); + command_buffer.reset(); + } + if (!command_buffer.has_value()) { /* Check for completed command buffers that can be reused. */ if (command_buffers_unused.is_empty()) { @@ -138,30 +179,61 @@ void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data) command_builder.record_commands(render_graph, *command_buffer, node_handles); if (submit_task->submit_to_device) { + /* Create submit infos for previous command buffers. */ + submit_infos.clear(); + if (!unsubmitted_command_buffers.is_empty()) { + VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, + nullptr, + 0, + nullptr, + nullptr, + uint32_t(unsubmitted_command_buffers.size()), + unsubmitted_command_buffers.data(), + 0, + nullptr}; + submit_infos.append(vk_submit_info); + } + + /* Finalize current command buffer. */ command_buffer->end_recording(); + unsubmitted_command_buffers.append(vk_command_buffer); + + uint32_t wait_semaphore_len = submit_task->wait_semaphore == VK_NULL_HANDLE ? 0 : 1; + uint32_t signal_semaphore_len = submit_task->signal_semaphore == VK_NULL_HANDLE ? 1 : 2; + VkSemaphore signal_semaphores[2] = {device->vk_timeline_semaphore_, + submit_task->signal_semaphore}; + uint64_t signal_semaphore_values[2] = {submit_task->timeline, 0}; + VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = { VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, nullptr, 0, nullptr, - 1, - &submit_task->timeline}; + signal_semaphore_len, + signal_semaphore_values}; VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, &vk_timeline_semaphore_submit_info, - 0, - nullptr, - nullptr, + wait_semaphore_len, + &submit_task->wait_semaphore, + &submit_task->wait_dst_stage_mask, 1, - &vk_command_buffer, - 1, - &device->vk_timeline_semaphore_}; + &unsubmitted_command_buffers.last(), + signal_semaphore_len, + signal_semaphores}; + submit_infos.append(vk_submit_info); { std::scoped_lock lock_queue(*device->queue_mutex_); - vkQueueSubmit(device->vk_queue_, 1, &vk_submit_info, VK_NULL_HANDLE); + vkQueueSubmit(device->vk_queue_, submit_infos.size(), submit_infos.data(), VK_NULL_HANDLE); + } + if (submit_task->is_submitted_ptr != nullptr) { + *submit_task->is_submitted_ptr = true; } - command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer); vk_command_buffer = VK_NULL_HANDLE; + for (VkCommandBuffer vk_command_buffer : unsubmitted_command_buffers) { + command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer); + } + unsubmitted_command_buffers.clear(); command_buffer.reset(); }