diff --git a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_compute.cc b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_compute.cc index 597570d6f05..dfd985bfde3 100644 --- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_compute.cc +++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_compute.cc @@ -28,7 +28,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_read_back) dispatch_info.dispatch_node.group_count_y = 1; dispatch_info.dispatch_node.group_count_z = 1; render_graph->add_node(dispatch_info); - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(3, log.size()); EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)", log[0]); @@ -75,7 +75,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_dispatch_read_back) dispatch_info.dispatch_node.group_count_z = 2; render_graph->add_node(dispatch_info); } - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(5, log.size()); EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)", log[0]); @@ -134,7 +134,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_dispatch_read_back_with_changing_descr dispatch_info.dispatch_node.group_count_z = 2; render_graph->add_node(dispatch_info); } - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(6, log.size()); EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)", log[0]); @@ -196,7 +196,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_dispatch_read_back_with_changing_pipel dispatch_info.dispatch_node.group_count_z = 2; render_graph->add_node(dispatch_info); } - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(6, log.size()); EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)", log[0]); @@ -259,7 +259,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_info.dispatch_node.group_count_z = 2; render_graph->add_node(dispatch_info); } - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(7, log.size()); EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)", log[0]); @@ -309,7 +309,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_indirect_read_back) dispatch_indirect_info.dispatch_indirect_node.buffer = command_buffer; dispatch_indirect_info.dispatch_indirect_node.offset = 0; render_graph->add_node(dispatch_indirect_info); - render_graph->submit_for_read(); + submit(render_graph, this->command_buffer); EXPECT_EQ(4, log.size()); EXPECT_EQ( "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, " @@ -364,7 +364,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_indirect_dispatch_indirect_read_back) dispatch_indirect_info.dispatch_indirect_node.offset = 12; render_graph->add_node(dispatch_indirect_info); } - render_graph->submit_for_read(); + submit(render_graph, this->command_buffer); EXPECT_EQ(6, log.size()); EXPECT_EQ( diff --git a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_present.cc b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_present.cc index 3eb14b62749..53b6de8e8f5 100644 --- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_present.cc +++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_present.cc @@ -15,8 +15,15 @@ TEST_F(VKRenderGraphTestPresent, transfer_and_present) VkHandle back_buffer(1u); resources.add_image(back_buffer, 1); + { + render_graph::VKSynchronizationNode::CreateInfo synchronization = {}; + synchronization.vk_image = back_buffer; + synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + render_graph->add_node(synchronization); + } - render_graph->submit_for_present(back_buffer); + submit(render_graph, command_buffer); EXPECT_EQ(1, log.size()); EXPECT_EQ( @@ -43,7 +50,15 @@ TEST_F(VKRenderGraphTestPresent, clear_and_present) clear_color_image.vk_image = back_buffer; render_graph->add_node(clear_color_image); - render_graph->submit_for_present(back_buffer); + { + render_graph::VKSynchronizationNode::CreateInfo synchronization = {}; + synchronization.vk_image = back_buffer; + synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + render_graph->add_node(synchronization); + } + + submit(render_graph, command_buffer); EXPECT_EQ(3, log.size()); diff --git a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_render.cc b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_render.cc index 19118f41d9a..ab67b6cf489 100644 --- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_render.cc +++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_render.cc @@ -69,7 +69,7 @@ TEST_P(VKRenderGraphTestRender, begin_clear_attachments_end_read_back) render_graph->add_node(copy_image_to_buffer); } - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(6, log.size()); EXPECT_EQ( @@ -185,7 +185,7 @@ TEST_P(VKRenderGraphTestRender, begin_draw_end) render_graph->add_node(end_rendering); } - render_graph->submit(); + submit(render_graph, command_buffer); EXPECT_EQ(5, log.size()); EXPECT_EQ( "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, " @@ -267,7 +267,7 @@ TEST_P(VKRenderGraphTestRender, begin_draw_end__layered) render_graph->add_node(end_rendering); } - render_graph->submit(); + submit(render_graph, command_buffer); EXPECT_EQ(7, log.size()); EXPECT_EQ( "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, " diff --git a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_scheduler.cc b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_scheduler.cc index 9dea535d63f..957de73238f 100644 --- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_scheduler.cc +++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_scheduler.cc @@ -54,7 +54,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_rendering_copy_buffer_end_rendering) render_graph->add_node(end_rendering); } - render_graph->submit_for_present(image); + { + render_graph::VKSynchronizationNode::CreateInfo synchronization = {}; + synchronization.vk_image = image; + synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + render_graph->add_node(synchronization); + } + + submit(render_graph, command_buffer); EXPECT_EQ(6, log.size()); EXPECT_EQ( @@ -169,7 +177,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_clear_attachments_copy_buffer_end) render_graph->add_node(end_rendering); } - render_graph->submit_for_present(image); + { + render_graph::VKSynchronizationNode::CreateInfo synchronization = {}; + synchronization.vk_image = image; + synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + render_graph->add_node(synchronization); + } + + submit(render_graph, command_buffer); EXPECT_EQ(7, log.size()); EXPECT_EQ( "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, " @@ -290,7 +306,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_copy_buffer_clear_attachments_end) render_graph->add_node(end_rendering); } - render_graph->submit_for_present(image); + { + render_graph::VKSynchronizationNode::CreateInfo synchronization = {}; + synchronization.vk_image = image; + synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + render_graph->add_node(synchronization); + } + + submit(render_graph, command_buffer); EXPECT_EQ(7, log.size()); EXPECT_EQ( "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, " @@ -427,7 +451,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_clear_attachments_copy_buffer_clear_att render_graph->add_node(end_rendering); } - render_graph->submit_for_present(image); + { + render_graph::VKSynchronizationNode::CreateInfo synchronization = {}; + synchronization.vk_image = image; + synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + render_graph->add_node(synchronization); + } + + submit(render_graph, command_buffer); ASSERT_EQ(8, log.size()); EXPECT_EQ( @@ -585,7 +617,7 @@ TEST_P(VKRenderGraphTestScheduler, begin_draw_copy_framebuffer_draw_end) render_graph->add_node(end_rendering); } - render_graph->submit(); + submit(render_graph, command_buffer); ASSERT_EQ(12, log.size()); EXPECT_EQ( @@ -800,7 +832,7 @@ TEST_P(VKRenderGraphTestScheduler, begin_update_draw_update_draw_update_draw_end render_graph->add_node(end_rendering); } - render_graph->submit(); + submit(render_graph, command_buffer); ASSERT_EQ(17, log.size()); EXPECT_EQ("update_buffer(dst_buffer=0x1, dst_offset=0, data_size=16)", log[0]); EXPECT_EQ("update_buffer(dst_buffer=0x2, dst_offset=0, data_size=24)", log[1]); @@ -997,7 +1029,7 @@ TEST_P(VKRenderGraphTestScheduler, begin_draw_copy_to_attachment_draw_end) render_graph->add_node(end_rendering); } - render_graph->submit(); + submit(render_graph, command_buffer); ASSERT_EQ(11, log.size()); EXPECT_EQ( "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, " diff --git a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_transfer.cc b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_transfer.cc index 1570e696899..110ea398712 100644 --- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_transfer.cc +++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_transfer.cc @@ -20,7 +20,7 @@ TEST_F(VKRenderGraphTestTransfer, fill_and_read_back) resources.add_buffer(buffer); VKFillBufferNode::CreateInfo fill_buffer = {buffer, 1024, 42}; render_graph->add_node(fill_buffer); - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(1, log.size()); EXPECT_EQ("fill_buffer(dst_buffer=0x1, dst_offset=0, size=1024, data=42)", log[0]); @@ -47,7 +47,7 @@ TEST_F(VKRenderGraphTestTransfer, fill_transfer_and_read_back) copy_buffer.region.size = 1024; render_graph->add_node(copy_buffer); - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(3, log.size()); EXPECT_EQ("fill_buffer(dst_buffer=0x1, dst_offset=0, size=1024, data=42)", log[0]); @@ -79,7 +79,7 @@ TEST_F(VKRenderGraphTestTransfer, fill_fill_read_back) render_graph->add_node(fill_buffer_1); VKFillBufferNode::CreateInfo fill_buffer_2 = {buffer, 1024, 42}; render_graph->add_node(fill_buffer_2); - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(3, log.size()); EXPECT_EQ("fill_buffer(dst_buffer=0x1, dst_offset=0, size=1024, data=0)", log[0]); @@ -142,7 +142,7 @@ TEST_F(VKRenderGraphTestTransfer, clear_clear_copy_and_read_back) render_graph->add_node(clear_color_image_dst); render_graph->add_node(copy_image); render_graph->add_node(copy_dst_image_to_buffer); - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(8, log.size()); EXPECT_EQ( @@ -268,7 +268,7 @@ TEST_F(VKRenderGraphTestTransfer, clear_blit_copy_and_read_back) VKBlitImageNode::CreateInfo blit_image = {src_image, dst_image, vk_image_blit, VK_FILTER_LINEAR}; render_graph->add_node(blit_image); render_graph->add_node(copy_dst_image_to_buffer); - render_graph->submit_for_read(); + submit(render_graph, command_buffer); EXPECT_EQ(6, log.size()); EXPECT_EQ( diff --git a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh index aaea4fd7bf7..cf0022fc17e 100644 --- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh +++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh @@ -52,19 +52,6 @@ class CommandBufferLog : public VKCommandBufferInterface { is_recording_ = false; } - void submit_with_cpu_synchronization(VkFence /*vk_fence*/) override - { - EXPECT_FALSE(is_recording_); - EXPECT_FALSE(is_cpu_synchronizing_); - is_cpu_synchronizing_ = true; - }; - void wait_for_cpu_synchronization(VkFence /*vk_fence*/) override - { - EXPECT_FALSE(is_recording_); - EXPECT_TRUE(is_cpu_synchronizing_); - is_cpu_synchronizing_ = false; - }; - void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) override { EXPECT_TRUE(is_recording_); @@ -481,16 +468,16 @@ class VKRenderGraphTest : public ::testing::Test { { resources.use_dynamic_rendering = use_dynamic_rendering; resources.use_dynamic_rendering_local_read = use_dynamic_rendering_local_read; - render_graph = std::make_unique( - std::make_unique( - log, use_dynamic_rendering, use_dynamic_rendering_local_read), - resources); + render_graph = std::make_unique(resources); + command_buffer = std::make_unique( + log, use_dynamic_rendering, use_dynamic_rendering_local_read); } protected: Vector log; VKResourceStateTracker resources; std::unique_ptr render_graph; + std::unique_ptr command_buffer; bool use_dynamic_rendering = true; bool use_dynamic_rendering_local_read = true; }; @@ -503,10 +490,9 @@ class VKRenderGraphTest_P : public ::testing::TestWithParam(GetParam()); resources.use_dynamic_rendering = use_dynamic_rendering; resources.use_dynamic_rendering_local_read = use_dynamic_rendering_local_read; - render_graph = std::make_unique( - std::make_unique( - log, use_dynamic_rendering, use_dynamic_rendering_local_read), - resources); + render_graph = std::make_unique(resources); + command_buffer = std::make_unique( + log, use_dynamic_rendering, use_dynamic_rendering_local_read); } protected: @@ -524,6 +510,7 @@ class VKRenderGraphTest_P : public ::testing::TestWithParam log; VKResourceStateTracker resources; std::unique_ptr render_graph; + std::unique_ptr command_buffer; bool use_dynamic_rendering = true; bool use_dynamic_rendering_local_read = true; }; @@ -546,4 +533,18 @@ template union VkHandle { } }; +static inline void submit(std::unique_ptr &render_graph, + std::unique_ptr &command_buffer) +{ + VKScheduler scheduler; + VKCommandBuilder command_builder; + Span node_handles = scheduler.select_nodes(*render_graph); + command_builder.build_nodes(*render_graph, *command_buffer, node_handles); + + command_buffer->begin_recording(); + command_builder.record_commands(*render_graph, *command_buffer, node_handles); + command_buffer->end_recording(); + + render_graph->reset(); +} } // namespace blender::gpu::render_graph diff --git a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc index c775b11bbed..667d3639936 100644 --- a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc +++ b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc @@ -11,72 +11,22 @@ #include "vk_device.hh" namespace blender::gpu::render_graph { -VKCommandBufferWrapper::VKCommandBufferWrapper(const VKWorkarounds &workarounds) +VKCommandBufferWrapper::VKCommandBufferWrapper(VkCommandBuffer vk_command_buffer, + const VKWorkarounds &workarounds) + : vk_command_buffer_(vk_command_buffer) { - vk_command_pool_create_info_ = {}; - vk_command_pool_create_info_.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; - vk_command_pool_create_info_.queueFamilyIndex = 0; - - vk_command_buffer_allocate_info_ = {}; - vk_command_buffer_allocate_info_.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; - vk_command_buffer_allocate_info_.commandPool = VK_NULL_HANDLE; - vk_command_buffer_allocate_info_.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; - vk_command_buffer_allocate_info_.commandBufferCount = 1; - - vk_command_buffer_begin_info_ = {}; - vk_command_buffer_begin_info_.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - vk_command_buffer_begin_info_.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - - vk_fence_create_info_ = {}; - vk_fence_create_info_.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - vk_fence_create_info_.flags = VK_FENCE_CREATE_SIGNALED_BIT; - - vk_submit_info_ = {}; - vk_submit_info_.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - vk_submit_info_.waitSemaphoreCount = 0; - vk_submit_info_.pWaitSemaphores = nullptr; - vk_submit_info_.pWaitDstStageMask = nullptr; - vk_submit_info_.commandBufferCount = 1; - vk_submit_info_.pCommandBuffers = &vk_command_buffer_; - vk_submit_info_.signalSemaphoreCount = 0; - vk_submit_info_.pSignalSemaphores = nullptr; - use_dynamic_rendering = !workarounds.dynamic_rendering; use_dynamic_rendering_local_read = !workarounds.dynamic_rendering_local_read; } -VKCommandBufferWrapper::~VKCommandBufferWrapper() -{ - VKDevice &device = VKBackend::get().device; - device.free_command_pool_buffers(vk_command_pool_); - if (vk_command_pool_ != VK_NULL_HANDLE) { - vkDestroyCommandPool(device.vk_handle(), vk_command_pool_, nullptr); - vk_command_pool_ = VK_NULL_HANDLE; - } - if (vk_fence_ != VK_NULL_HANDLE) { - vkDestroyFence(device.vk_handle(), vk_fence_, nullptr); - vk_fence_ = VK_NULL_HANDLE; - } -} - void VKCommandBufferWrapper::begin_recording() { - VKDevice &device = VKBackend::get().device; - if (vk_command_pool_ == VK_NULL_HANDLE) { - vk_command_pool_create_info_.queueFamilyIndex = device.queue_family_get(); - vkCreateCommandPool( - device.vk_handle(), &vk_command_pool_create_info_, nullptr, &vk_command_pool_); - vk_command_buffer_allocate_info_.commandPool = vk_command_pool_; - vk_command_pool_create_info_.queueFamilyIndex = 0; - } - if (vk_fence_ == VK_NULL_HANDLE) { - vkCreateFence(device.vk_handle(), &vk_fence_create_info_, nullptr, &vk_fence_); - } - BLI_assert(vk_command_buffer_ == VK_NULL_HANDLE); - vkAllocateCommandBuffers( - device.vk_handle(), &vk_command_buffer_allocate_info_, &vk_command_buffer_); - - vkBeginCommandBuffer(vk_command_buffer_, &vk_command_buffer_begin_info_); + VkCommandBufferBeginInfo vk_command_buffer_begin_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + nullptr, + VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + nullptr}; + vkBeginCommandBuffer(vk_command_buffer_, &vk_command_buffer_begin_info); } void VKCommandBufferWrapper::end_recording() @@ -84,32 +34,6 @@ void VKCommandBufferWrapper::end_recording() vkEndCommandBuffer(vk_command_buffer_); } -void VKCommandBufferWrapper::submit_with_cpu_synchronization(VkFence vk_fence) -{ - if (vk_fence == VK_NULL_HANDLE) { - vk_fence = vk_fence_; - } - VKDevice &device = VKBackend::get().device; - vkResetFences(device.vk_handle(), 1, &vk_fence); - { - std::scoped_lock lock(device.queue_mutex_get()); - vkQueueSubmit(device.queue_get(), 1, &vk_submit_info_, vk_fence); - } - device.discard_pool_for_current_thread(true).discard_command_buffer(vk_command_buffer_, - vk_command_pool_); - vk_command_buffer_ = nullptr; -} - -void VKCommandBufferWrapper::wait_for_cpu_synchronization(VkFence vk_fence) -{ - if (vk_fence == VK_NULL_HANDLE) { - vk_fence = vk_fence_; - } - VKDevice &device = VKBackend::get().device; - while (vkWaitForFences(device.vk_handle(), 1, &vk_fence, true, UINT64_MAX) == VK_TIMEOUT) { - } -} - void VKCommandBufferWrapper::bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) { diff --git a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh index 7ce601bdf2d..08713e67a93 100644 --- a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh +++ b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh @@ -25,8 +25,6 @@ class VKCommandBufferInterface { virtual void begin_recording() = 0; virtual void end_recording() = 0; - virtual void submit_with_cpu_synchronization(VkFence vk_fence = VK_NULL_HANDLE) = 0; - virtual void wait_for_cpu_synchronization(VkFence vk_fence = VK_NULL_HANDLE) = 0; virtual void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) = 0; virtual void bind_descriptor_sets(VkPipelineBindPoint pipeline_bind_point, @@ -145,24 +143,13 @@ class VKCommandBufferInterface { class VKCommandBufferWrapper : public VKCommandBufferInterface { private: - VkCommandPoolCreateInfo vk_command_pool_create_info_; - VkCommandBufferAllocateInfo vk_command_buffer_allocate_info_; - VkCommandBufferBeginInfo vk_command_buffer_begin_info_; - VkFenceCreateInfo vk_fence_create_info_; - VkSubmitInfo vk_submit_info_; - - VkCommandPool vk_command_pool_ = VK_NULL_HANDLE; VkCommandBuffer vk_command_buffer_ = VK_NULL_HANDLE; - VkFence vk_fence_ = VK_NULL_HANDLE; public: - VKCommandBufferWrapper(const VKWorkarounds &workarounds); - virtual ~VKCommandBufferWrapper(); + VKCommandBufferWrapper(VkCommandBuffer vk_command_buffer, const VKWorkarounds &workarounds); void begin_recording() override; void end_recording() override; - void submit_with_cpu_synchronization(VkFence vk_fence) override; - void wait_for_cpu_synchronization(VkFence vk_fence) override; void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) override; void bind_descriptor_sets(VkPipelineBindPoint pipeline_bind_point, diff --git a/source/blender/gpu/vulkan/render_graph/vk_command_builder.cc b/source/blender/gpu/vulkan/render_graph/vk_command_builder.cc index 9e4c7e4b3ba..2424eb97887 100644 --- a/source/blender/gpu/vulkan/render_graph/vk_command_builder.cc +++ b/source/blender/gpu/vulkan/render_graph/vk_command_builder.cc @@ -26,10 +26,13 @@ void VKCommandBuilder::build_nodes(VKRenderGraph &render_graph, groups_init(render_graph, node_handles); groups_extract_barriers( render_graph, node_handles, command_buffer.use_dynamic_rendering_local_read); +} - command_buffer.begin_recording(); +void VKCommandBuilder::record_commands(VKRenderGraph &render_graph, + VKCommandBufferInterface &command_buffer, + Span node_handles) +{ groups_build_commands(render_graph, command_buffer, node_handles); - command_buffer.end_recording(); } void VKCommandBuilder::groups_init(const VKRenderGraph &render_graph, @@ -71,8 +74,8 @@ void VKCommandBuilder::groups_extract_barriers(VKRenderGraph &render_graph, node_pre_barriers_.resize(node_handles.size()); /* Keep track of the post barriers that needs to be added. The pre barriers will be stored - * directly in `barrier_list_` but may not mingle with the pre barriers. Most barriers are group - * pre barriers. */ + * directly in `barrier_list_` but may not mingle with the pre barriers. Most barriers are + * group pre barriers. */ Vector post_barriers; /* Keep track of the node pre barriers that needs to be added. The pre barriers will be stored * directly in `barrier_list_` but may not mingle with the group barriers. */ @@ -157,8 +160,8 @@ void VKCommandBuilder::groups_extract_barriers(VKRenderGraph &render_graph, barrier_list_.append(barrier); } - /* Resume layered tracking. Each layer that has an override will be transition back to the - * layer specific image layout. */ + /* Resume layered tracking. Each layer that has an override will be transition back to + * the layer specific image layout. */ barrier = {}; layered_tracker.resume(barrier, use_local_read); if (!barrier.is_empty()) { diff --git a/source/blender/gpu/vulkan/render_graph/vk_command_builder.hh b/source/blender/gpu/vulkan/render_graph/vk_command_builder.hh index 619ab884d4a..068b95a173f 100644 --- a/source/blender/gpu/vulkan/render_graph/vk_command_builder.hh +++ b/source/blender/gpu/vulkan/render_graph/vk_command_builder.hh @@ -177,21 +177,23 @@ class VKCommandBuilder { public: /** - * Build the commands of the nodes provided by the `node_handles` parameter. The commands are - * recorded into the given `command_buffer`. - * - * Pre-condition: - * - `command_buffer` must not be in initial state according to - * https://docs.vulkan.org/spec/latest/chapters/cmdbuffers.html#commandbuffers-lifecycle - * - * Post-condition: - * - `command_buffer` will be in executable state according to - * https://docs.vulkan.org/spec/latest/chapters/cmdbuffers.html#commandbuffers-lifecycle + * Build execution groups and barriers. + * This method should be performed when the resources are locked. */ void build_nodes(VKRenderGraph &render_graph, VKCommandBufferInterface &command_buffer, Span node_handles); + /** + * Record commands of the nodes provided by the `node_handles` parameter. The commands are + * recorded into the given `command_buffer`. + * + * `build_nodes` needs to be called before calling with exact the same parameters. + */ + void record_commands(VKRenderGraph &render_graph, + VKCommandBufferInterface &command_buffer, + Span node_handles); + private: /** * Split the node_handles in logical groups. diff --git a/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc b/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc index 47088774bf3..b03d30934ac 100644 --- a/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc +++ b/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc @@ -13,20 +13,13 @@ namespace blender::gpu::render_graph { -VKRenderGraph::VKRenderGraph(std::unique_ptr command_buffer, - VKResourceStateTracker &resources) - : command_buffer_(std::move(command_buffer)), resources_(resources) +VKRenderGraph::VKRenderGraph(VKResourceStateTracker &resources) : resources_(resources) { submission_id.reset(); } -void VKRenderGraph::remove_nodes(Span node_handles) +void VKRenderGraph::reset() { - UNUSED_VARS_NDEBUG(node_handles); - BLI_assert_msg(node_handles.size() == nodes_.size(), - "Currently only supporting removing all nodes. The VKScheduler doesn't walk the " - "nodes, and will use incorrect ordering when not all nodes are removed. This " - "needs to be fixed when implementing a better scheduler."); links_.clear(); for (VKRenderGraphNode &node : nodes_) { node.free_data(storage_); @@ -40,68 +33,6 @@ void VKRenderGraph::remove_nodes(Span node_handles) /** \} */ -/* -------------------------------------------------------------------- */ -/** \name Submit graph - * \{ */ - -void VKRenderGraph::submit_for_present(VkImage vk_swapchain_image) -{ - /* Needs to be executed at forehand as `add_node` also locks the mutex. */ - VKSynchronizationNode::CreateInfo synchronization = {}; - synchronization.vk_image = vk_swapchain_image; - synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; - synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - add_node(synchronization); - - std::scoped_lock lock(resources_.mutex); - Span node_handles = scheduler_.select_nodes(*this); - command_builder_.build_nodes(*this, *command_buffer_, node_handles); - /* TODO: To improve performance it could be better to return a semaphore. This semaphore can be - * passed in the swapchain to ensure GPU synchronization. This also require a second semaphore to - * pause drawing until the swapchain has completed its drawing phase. - * - * Currently using CPU synchronization for safety. */ - command_buffer_->submit_with_cpu_synchronization(); - submission_id.next(); - remove_nodes(node_handles); - command_buffer_->wait_for_cpu_synchronization(); -} - -void VKRenderGraph::submit_for_read() -{ - std::scoped_lock lock(resources_.mutex); - Span node_handles = scheduler_.select_nodes(*this); - command_builder_.build_nodes(*this, *command_buffer_, node_handles); - command_buffer_->submit_with_cpu_synchronization(); - submission_id.next(); - remove_nodes(node_handles); - command_buffer_->wait_for_cpu_synchronization(); -} - -void VKRenderGraph::submit() -{ - /* Using `VK_NULL_HANDLE` will select the default VkFence of the command buffer. */ - submit_synchronization_event(VK_NULL_HANDLE); - wait_synchronization_event(VK_NULL_HANDLE); -} - -void VKRenderGraph::submit_synchronization_event(VkFence vk_fence) -{ - std::scoped_lock lock(resources_.mutex); - Span node_handles = scheduler_.select_nodes(*this); - command_builder_.build_nodes(*this, *command_buffer_, node_handles); - command_buffer_->submit_with_cpu_synchronization(vk_fence); - submission_id.next(); - remove_nodes(node_handles); -} - -void VKRenderGraph::wait_synchronization_event(VkFence vk_fence) -{ - command_buffer_->wait_for_cpu_synchronization(vk_fence); -} - -/** \} */ - /* -------------------------------------------------------------------- */ /** \name Debug * \{ */ diff --git a/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh b/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh index d3d2b5c4f6b..8aed4c514e0 100644 --- a/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh +++ b/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh @@ -75,22 +75,6 @@ class VKRenderGraph : public NonCopyable { /** Storage for large node datas to improve CPU cache pre-loading. */ VKRenderGraphStorage storage_; - /** Scheduler decides which nodes to select and in what order to execute them. */ - VKScheduler scheduler_; - /** - * Command builder generated the commands of the nodes and record them into the command buffer. - */ - VKCommandBuilder command_builder_; - - /** - * Command buffer sends the commands to the device (`VKCommandBufferWrapper`). - * - * To improve testability the command buffer can be replaced by an instance of - * `VKCommandBufferLog` this way test cases don't need to create a fully working context in order - * to test something render graph specific. - */ - std::unique_ptr command_buffer_; - /** * Not owning pointer to device resources. * @@ -117,11 +101,16 @@ class VKRenderGraph : public NonCopyable { /** Current stack of debug group names. */ Vector group_stack; - /** Has a node been added to the current stack? If not the group stack will be added to - * used_groups. */ + + /** + * Has a node been added to the current stack? If not the group stack will be added to + * used_groups. + */ bool group_used = false; + /** All used debug groups. */ Vector> used_groups; + /** * Map of a node_handle to an index of debug group in used_groups. * @@ -142,8 +131,7 @@ class VKRenderGraph : public NonCopyable { * To improve testability the command buffer and resources they work on are provided as a * parameter. */ - VKRenderGraph(std::unique_ptr command_buffer, - VKResourceStateTracker &resources); + VKRenderGraph(VKResourceStateTracker &resources); private: /** @@ -214,41 +202,9 @@ class VKRenderGraph : public NonCopyable { ADD_NODE(VKResetQueryPoolNode) ADD_NODE(VKUpdateBufferNode) ADD_NODE(VKUpdateMipmapsNode) + ADD_NODE(VKSynchronizationNode) #undef ADD_NODE - /** - * Submit partial graph to be able to read the expected result of the rendering commands - * affecting the given vk_buffer. This method is called from - * `GPU_texture/storagebuf/indexbuf/vertbuf/_read`. In vulkan the content of images cannot be - * read directly and always needs to be copied to a transfer buffer. - * - * After calling this function the mapped memory of the vk_buffer would contain the data of the - * buffer. - */ - void submit_for_read(); - - /** - * Submit partial graph to be able to present the expected result of the rendering commands - * affecting the given vk_swapchain_image. This method is called when performing a - * swap chain swap. - * - * Pre conditions: - * - `vk_swapchain_image` needs to be registered in VKResourceStateTracker. - * - * Post conditions: - * - `vk_swapchain_image` layout is transitioned to `VK_IMAGE_LAYOUT_SRC_PRESENT`. - */ - void submit_for_present(VkImage vk_swapchain_image); - - /** - * Submit full graph. - */ - void submit(); - - /** Submit render graph with CPU synchronization event. */ - void submit_synchronization_event(VkFence vk_fence); - /** Wait and reset for a CPU synchronization event. */ - void wait_synchronization_event(VkFence vk_fence); /** * Push a new debugging group to the stack with the given name. * @@ -289,8 +245,12 @@ class VKRenderGraph : public NonCopyable { void debug_print(NodeHandle node_handle) const; + /** + * Reset the render graph. + */ + void reset(); + private: - void remove_nodes(Span node_handles); }; } // namespace blender::gpu::render_graph diff --git a/source/blender/gpu/vulkan/render_graph/vk_scheduler.cc b/source/blender/gpu/vulkan/render_graph/vk_scheduler.cc index c79b2a0743b..3184e87951f 100644 --- a/source/blender/gpu/vulkan/render_graph/vk_scheduler.cc +++ b/source/blender/gpu/vulkan/render_graph/vk_scheduler.cc @@ -22,7 +22,6 @@ Span VKScheduler::select_nodes(const VKRenderGraph &render_graph) for (NodeHandle node_handle : render_graph.nodes_.index_range()) { result_.append(node_handle); } - reorder_nodes(render_graph); return result_; } diff --git a/source/blender/gpu/vulkan/vk_backend.cc b/source/blender/gpu/vulkan/vk_backend.cc index b29960a1b64..4db7690fc36 100644 --- a/source/blender/gpu/vulkan/vk_backend.cc +++ b/source/blender/gpu/vulkan/vk_backend.cc @@ -456,7 +456,7 @@ void VKBackend::compute_dispatch(int groups_x_len, int groups_y_len, int groups_ dispatch_info.dispatch_node.group_count_x = groups_x_len; dispatch_info.dispatch_node.group_count_y = groups_y_len; dispatch_info.dispatch_node.group_count_z = groups_z_len; - context.render_graph.add_node(dispatch_info); + context.render_graph().add_node(dispatch_info); } void VKBackend::compute_dispatch_indirect(StorageBuf *indirect_buf) @@ -469,7 +469,7 @@ void VKBackend::compute_dispatch_indirect(StorageBuf *indirect_buf) context.update_pipeline_data(dispatch_indirect_info.dispatch_indirect_node.pipeline_data); dispatch_indirect_info.dispatch_indirect_node.buffer = indirect_buffer.vk_handle(); dispatch_indirect_info.dispatch_indirect_node.offset = 0; - context.render_graph.add_node(dispatch_indirect_info); + context.render_graph().add_node(dispatch_indirect_info); } Context *VKBackend::context_alloc(void *ghost_window, void *ghost_context) @@ -484,7 +484,7 @@ Context *VKBackend::context_alloc(void *ghost_window, void *ghost_context) device.init(ghost_context); } - VKContext *context = new VKContext(ghost_window, ghost_context, device.resources); + VKContext *context = new VKContext(ghost_window, ghost_context); device.context_register(*context); GHOST_SetVulkanSwapBuffersCallbacks((GHOST_ContextHandle)ghost_context, VKContext::swap_buffers_pre_callback, @@ -564,23 +564,9 @@ void VKBackend::render_end() if (thread_data.rendering_depth == 0) { VKContext *context = VKContext::get(); if (context != nullptr) { - context->flush_render_graph(); + context->flush_render_graph(RenderGraphFlushFlags::RENEW_RENDER_GRAPH); } - - thread_data.resource_pool_next(); - VKResourcePool &resource_pool = thread_data.resource_pool_get(); - resource_pool.discard_pool.destroy_discarded_resources(device); - resource_pool.reset(); - } - } - - else if (!BLI_thread_is_main()) { - /* Foreground rendering using a worker/render thread. In this case we move the resources to the - * device discard list and it will be cleared by the main thread. */ - if (thread_data.rendering_depth == 0) { - VKResourcePool &resource_pool = thread_data.resource_pool_get(); - device.orphaned_data.move_data(resource_pool.discard_pool); - resource_pool.reset(); + device.orphaned_data.destroy_discarded_resources(device); } } } diff --git a/source/blender/gpu/vulkan/vk_batch.cc b/source/blender/gpu/vulkan/vk_batch.cc index affc1eb84bf..803f2bc8c2e 100644 --- a/source/blender/gpu/vulkan/vk_batch.cc +++ b/source/blender/gpu/vulkan/vk_batch.cc @@ -47,7 +47,7 @@ void VKBatch::draw(int vertex_first, int vertex_count, int instance_first, int i vao.bind(draw_indexed.node_data.vertex_buffers); context.update_pipeline_data(prim_type, vao, draw_indexed.node_data.pipeline_data); - context.render_graph.add_node(draw_indexed); + context.render_graph().add_node(draw_indexed); } else { render_graph::VKDrawNode::CreateInfo draw(resource_access_info); @@ -58,7 +58,7 @@ void VKBatch::draw(int vertex_first, int vertex_count, int instance_first, int i vao.bind(draw.node_data.vertex_buffers); context.update_pipeline_data(prim_type, vao, draw.node_data.pipeline_data); - context.render_graph.add_node(draw); + context.render_graph().add_node(draw); } } @@ -108,7 +108,7 @@ void VKBatch::multi_draw_indirect(const VkBuffer indirect_buffer, vao.bind(draw_indexed_indirect.node_data.vertex_buffers); context.update_pipeline_data(prim_type, vao, draw_indexed_indirect.node_data.pipeline_data); - context.render_graph.add_node(draw_indexed_indirect); + context.render_graph().add_node(draw_indexed_indirect); } else { render_graph::VKDrawIndirectNode::CreateInfo draw(resource_access_info); @@ -119,7 +119,7 @@ void VKBatch::multi_draw_indirect(const VkBuffer indirect_buffer, vao.bind(draw.node_data.vertex_buffers); context.update_pipeline_data(prim_type, vao, draw.node_data.pipeline_data); - context.render_graph.add_node(draw); + context.render_graph().add_node(draw); } } diff --git a/source/blender/gpu/vulkan/vk_buffer.cc b/source/blender/gpu/vulkan/vk_buffer.cc index 733a2975a83..37501dbbb75 100644 --- a/source/blender/gpu/vulkan/vk_buffer.cc +++ b/source/blender/gpu/vulkan/vk_buffer.cc @@ -90,7 +90,7 @@ void VKBuffer::update_render_graph(VKContext &context, void *data) const update_buffer.dst_buffer = vk_buffer_; update_buffer.data_size = size_in_bytes_; update_buffer.data = data; - context.render_graph.add_node(update_buffer); + context.render_graph().add_node(update_buffer); } void VKBuffer::flush() const @@ -106,7 +106,7 @@ void VKBuffer::clear(VKContext &context, uint32_t clear_value) fill_buffer.vk_buffer = vk_buffer_; fill_buffer.data = clear_value; fill_buffer.size = size_in_bytes_; - context.render_graph.add_node(fill_buffer); + context.render_graph().add_node(fill_buffer); } void VKBuffer::read(VKContext &context, void *data) const @@ -114,7 +114,9 @@ void VKBuffer::read(VKContext &context, void *data) const BLI_assert_msg(is_mapped(), "Cannot read a non-mapped buffer."); context.rendering_end(); context.descriptor_set_get().upload_descriptor_sets(); - context.render_graph.submit_for_read(); + context.flush_render_graph(RenderGraphFlushFlags::SUBMIT | + RenderGraphFlushFlags::WAIT_FOR_COMPLETION | + RenderGraphFlushFlags::RENEW_RENDER_GRAPH); memcpy(data, mapped_memory_, size_in_bytes_); } @@ -153,8 +155,7 @@ bool VKBuffer::free() unmap(); } - VKDevice &device = VKBackend::get().device; - device.discard_pool_for_current_thread().discard_buffer(vk_buffer_, allocation_); + VKDiscardPool::discard_pool_get().discard_buffer(vk_buffer_, allocation_); allocation_ = VK_NULL_HANDLE; vk_buffer_ = VK_NULL_HANDLE; diff --git a/source/blender/gpu/vulkan/vk_common.hh b/source/blender/gpu/vulkan/vk_common.hh index 1d5bb8dd911..80dbe0a34e8 100644 --- a/source/blender/gpu/vulkan/vk_common.hh +++ b/source/blender/gpu/vulkan/vk_common.hh @@ -25,6 +25,8 @@ namespace blender::gpu { +using TimelineValue = uint64_t; + /** * Based on the usage of an Image View a different image view type should be created. * diff --git a/source/blender/gpu/vulkan/vk_context.cc b/source/blender/gpu/vulkan/vk_context.cc index 3f17f3c2a77..090bf36ea9b 100644 --- a/source/blender/gpu/vulkan/vk_context.cc +++ b/source/blender/gpu/vulkan/vk_context.cc @@ -22,12 +22,7 @@ namespace blender::gpu { -VKContext::VKContext(void *ghost_window, - void *ghost_context, - render_graph::VKResourceStateTracker &resources) - : render_graph(std::make_unique( - VKBackend::get().device.workarounds_get()), - resources) +VKContext::VKContext(void *ghost_window, void *ghost_context) { ghost_window_ = ghost_window; ghost_context_ = ghost_context; @@ -58,7 +53,6 @@ VKContext::~VKContext() void VKContext::sync_backbuffer(bool cycle_resource_pool) { - VKDevice &device = VKBackend::get().device; if (ghost_window_) { GHOST_VulkanSwapChainData swap_chain_data = {}; GHOST_GetVulkanSwapChainFormat((GHOST_WindowHandle)ghost_window_, &swap_chain_data); @@ -67,9 +61,6 @@ void VKContext::sync_backbuffer(bool cycle_resource_pool) thread_data.resource_pool_next(); VKResourcePool &resource_pool = thread_data.resource_pool_get(); imm = &resource_pool.immediate; - resource_pool.discard_pool.destroy_discarded_resources(device); - resource_pool.reset(); - resource_pool.discard_pool.move_data(device.orphaned_data); } const bool reset_framebuffer = swap_chain_format_.format != @@ -106,11 +97,6 @@ void VKContext::sync_backbuffer(bool cycle_resource_pool) vk_extent_ = swap_chain_data.extent; } } -#if 0 - else (is_background) { - discard all orphaned data - } -#endif } void VKContext::activate() @@ -122,6 +108,14 @@ void VKContext::activate() VKThreadData &thread_data = device.current_thread_data(); thread_data_ = std::reference_wrapper(thread_data); + if (!render_graph_.has_value()) { + render_graph_ = std::reference_wrapper( + *device.render_graph_new()); + for (const StringRef &group : debug_stack) { + debug_group_begin(std::string(group).c_str(), 0); + } + } + imm = &thread_data.resource_pool_get().immediate; is_active_ = true; @@ -133,24 +127,26 @@ void VKContext::activate() void VKContext::deactivate() { - flush_render_graph(); + flush_render_graph(RenderGraphFlushFlags(0)); immDeactivate(); imm = nullptr; thread_data_.reset(); + is_active_ = false; } void VKContext::begin_frame() {} -void VKContext::end_frame() {} +void VKContext::end_frame() +{ + VKDevice &device = VKBackend::get().device; + device.orphaned_data.destroy_discarded_resources(device); +} void VKContext::flush() {} -void VKContext::flush_render_graph() +TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags) { - if (render_graph.is_empty()) { - return; - } if (has_active_framebuffer()) { VKFrameBuffer &framebuffer = *active_framebuffer_get(); if (framebuffer.is_rendering()) { @@ -158,7 +154,21 @@ void VKContext::flush_render_graph() } } descriptor_set_get().upload_descriptor_sets(); - render_graph.submit(); + VKDevice &device = VKBackend::get().device; + TimelineValue timeline = device.render_graph_submit( + &render_graph_.value().get(), + discard_pool, + bool(flags & RenderGraphFlushFlags::SUBMIT), + bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION)); + render_graph_.reset(); + if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) { + render_graph_ = std::reference_wrapper( + *device.render_graph_new()); + for (const StringRef &group : debug_stack) { + debug_group_begin(std::string(group).c_str(), 0); + } + } + return timeline; } void VKContext::finish() {} @@ -356,10 +366,18 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c device.resources.add_image(swap_chain_data.image, 1, "SwapchainImage"); framebuffer.rendering_end(*this); + render_graph::VKRenderGraph &render_graph = this->render_graph(); render_graph.add_node(blit_image); GPU_debug_group_end(); descriptor_set_get().upload_descriptor_sets(); - render_graph.submit_for_present(swap_chain_data.image); + render_graph::VKSynchronizationNode::CreateInfo synchronization = {}; + synchronization.vk_image = swap_chain_data.image; + synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + render_graph.add_node(synchronization); + flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION | + RenderGraphFlushFlags::RENEW_RENDER_GRAPH); + device.resources.remove_image(swap_chain_data.image); #if 0 device.debug_print(); diff --git a/source/blender/gpu/vulkan/vk_context.hh b/source/blender/gpu/vulkan/vk_context.hh index b31d11f2449..5f3ddf0792c 100644 --- a/source/blender/gpu/vulkan/vk_context.hh +++ b/source/blender/gpu/vulkan/vk_context.hh @@ -8,6 +8,8 @@ #pragma once +#include "BLI_utildefines.h" + #include "gpu_context_private.hh" #include "GHOST_Types.h" @@ -26,6 +28,14 @@ class VKStateManager; class VKShader; class VKThreadData; +enum RenderGraphFlushFlags { + NONE = 0, + RENEW_RENDER_GRAPH = 1 << 0, + SUBMIT = 1 << 1, + WAIT_FOR_COMPLETION = 1 << 2, +}; +ENUM_OPERATORS(RenderGraphFlushFlags, RenderGraphFlushFlags::WAIT_FOR_COMPLETION); + class VKContext : public Context, NonCopyable { private: VkExtent2D vk_extent_ = {}; @@ -37,13 +47,21 @@ class VKContext : public Context, NonCopyable { render_graph::VKResourceAccessInfo access_info_ = {}; std::optional> thread_data_; + std::optional> render_graph_; public: - render_graph::VKRenderGraph render_graph; + VKDiscardPool discard_pool; - VKContext(void *ghost_window, - void *ghost_context, - render_graph::VKResourceStateTracker &resources); + const render_graph::VKRenderGraph &render_graph() const + { + return render_graph_.value().get(); + } + render_graph::VKRenderGraph &render_graph() + { + return render_graph_.value().get(); + } + + VKContext(void *ghost_window, void *ghost_context); virtual ~VKContext(); void activate() override; @@ -52,7 +70,8 @@ class VKContext : public Context, NonCopyable { void end_frame() override; void flush() override; - void flush_render_graph(); + + TimelineValue flush_render_graph(RenderGraphFlushFlags flags); void finish() override; void memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb) override; diff --git a/source/blender/gpu/vulkan/vk_debug.cc b/source/blender/gpu/vulkan/vk_debug.cc index 8f95e1ad5fc..8e30eac71af 100644 --- a/source/blender/gpu/vulkan/vk_debug.cc +++ b/source/blender/gpu/vulkan/vk_debug.cc @@ -21,17 +21,18 @@ static CLG_LogRef LOG = {"gpu.vulkan"}; namespace blender::gpu { void VKContext::debug_group_begin(const char *name, int) { - render_graph.debug_group_begin(name, debug::get_debug_group_color(name)); + render_graph().debug_group_begin(name, debug::get_debug_group_color(name)); } void VKContext::debug_group_end() { - render_graph.debug_group_end(); + render_graph().debug_group_end(); } bool VKContext::debug_capture_begin(const char *title) { - flush_render_graph(); + flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION | + RenderGraphFlushFlags::RENEW_RENDER_GRAPH); return VKBackend::get().debug_capture_begin(title); } @@ -51,7 +52,8 @@ bool VKBackend::debug_capture_begin(const char *title) void VKContext::debug_capture_end() { - flush_render_graph(); + flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION | + RenderGraphFlushFlags::RENEW_RENDER_GRAPH); VKBackend::get().debug_capture_end(); } diff --git a/source/blender/gpu/vulkan/vk_device.cc b/source/blender/gpu/vulkan/vk_device.cc index 4d8b581191b..e1cc0836527 100644 --- a/source/blender/gpu/vulkan/vk_device.cc +++ b/source/blender/gpu/vulkan/vk_device.cc @@ -37,6 +37,9 @@ void VKDevice::deinit() if (!is_initialized()) { return; } + lifetime = Lifetime::DEINITIALIZING; + + deinit_submission_pool(); dummy_buffer.free(); samplers_.free(); @@ -52,9 +55,15 @@ void VKDevice::deinit() pipelines.write_to_disk(); pipelines.free_data(); descriptor_set_layouts_.deinit(); + orphaned_data.deinit(*this); vmaDestroyAllocator(mem_allocator_); mem_allocator_ = VK_NULL_HANDLE; + while (!render_graphs_.is_empty()) { + render_graph::VKRenderGraph *render_graph = render_graphs_.pop_last(); + MEM_delete(render_graph); + } + debugging_tools_.deinit(vk_instance_); vk_instance_ = VK_NULL_HANDLE; @@ -64,11 +73,12 @@ void VKDevice::deinit() vk_queue_ = VK_NULL_HANDLE; vk_physical_device_properties_ = {}; glsl_patch_.clear(); + lifetime = Lifetime::DESTROYED; } bool VKDevice::is_initialized() const { - return vk_device_ != VK_NULL_HANDLE; + return lifetime == Lifetime::RUNNING; } void VKDevice::init(void *ghost_context) @@ -105,6 +115,10 @@ void VKDevice::init(void *ghost_context) resources.use_dynamic_rendering = !workarounds_.dynamic_rendering; resources.use_dynamic_rendering_local_read = !workarounds_.dynamic_rendering_local_read; + orphaned_data.timeline_ = timeline_value_ + 1; + + init_submission_pool(); + lifetime = Lifetime::RUNNING; } void VKDevice::init_functions() @@ -349,6 +363,214 @@ std::string VKDevice::driver_version() const /** \} */ +/* -------------------------------------------------------------------- */ +/** \name Render graph + * \{ */ + +struct VKRenderGraphSubmitTask { + render_graph::VKRenderGraph *render_graph; + uint64_t timeline; + bool submit_to_device; +}; + +TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_graph, + VKDiscardPool &context_discard_pool, + bool submit_to_device, + bool wait_for_completion) +{ + if (render_graph->is_empty()) { + render_graph->reset(); + BLI_thread_queue_push(unused_render_graphs_, render_graph); + return 0; + } + + VKRenderGraphSubmitTask *submit_task = MEM_new(__func__); + submit_task->render_graph = render_graph; + submit_task->submit_to_device = submit_to_device; + TimelineValue timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ : + timeline_value_ + 1; + orphaned_data.timeline_ = timeline + 1; + orphaned_data.move_data(context_discard_pool, timeline); + BLI_thread_queue_push(submitted_render_graphs_, submit_task); + submit_task = nullptr; + + if (wait_for_completion) { + wait_for_timeline(timeline); + } + return timeline; +} + +void VKDevice::wait_for_timeline(TimelineValue timeline) +{ + if (timeline == 0) { + return; + } + VkSemaphoreWaitInfo vk_semaphore_wait_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, nullptr, 0, 1, &vk_timeline_semaphore_, &timeline}; + vkWaitSemaphores(vk_device_, &vk_semaphore_wait_info, UINT64_MAX); +} + +render_graph::VKRenderGraph *VKDevice::render_graph_new() +{ + render_graph::VKRenderGraph *render_graph = static_cast( + BLI_thread_queue_pop_timeout(unused_render_graphs_, 0)); + if (render_graph) { + return render_graph; + } + + std::scoped_lock lock(resources.mutex); + render_graph = MEM_new(__func__, resources); + render_graphs_.append(render_graph); + return render_graph; +} + +void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data) +{ + UNUSED_VARS(task_data); + + VKDevice *device = static_cast(BLI_task_pool_user_data(pool)); + VkCommandPool vk_command_pool = VK_NULL_HANDLE; + VkCommandPoolCreateInfo vk_command_pool_create_info = { + VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + nullptr, + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + device->vk_queue_family_}; + vkCreateCommandPool(device->vk_device_, &vk_command_pool_create_info, nullptr, &vk_command_pool); + + render_graph::VKScheduler scheduler; + render_graph::VKCommandBuilder command_builder; + Vector command_buffers_unused; + TimelineResources command_buffers_in_use; + VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE; + std::optional command_buffer; + + while (device->lifetime < Lifetime::DEINITIALIZING) { + VKRenderGraphSubmitTask *submit_task = static_cast( + BLI_thread_queue_pop_timeout(device->submitted_render_graphs_, 1)); + if (submit_task == nullptr) { + continue; + } + + if (!command_buffer.has_value()) { + /* Check for completed command buffers that can be reused. */ + if (command_buffers_unused.is_empty()) { + uint64_t current_timeline = device->submission_finished_timeline_get(); + command_buffers_in_use.remove_old(current_timeline, + [&](VkCommandBuffer vk_command_buffer) { + command_buffers_unused.append(vk_command_buffer); + }); + } + + /* Create new command buffers when there are no left to be reused. */ + if (command_buffers_unused.is_empty()) { + command_buffers_unused.resize(10, VK_NULL_HANDLE); + VkCommandBufferAllocateInfo vk_command_buffer_allocate_info = { + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + nullptr, + vk_command_pool, + VK_COMMAND_BUFFER_LEVEL_PRIMARY, + 10}; + vkAllocateCommandBuffers( + device->vk_device_, &vk_command_buffer_allocate_info, command_buffers_unused.data()); + }; + + vk_command_buffer = command_buffers_unused.pop_last(); + command_buffer = std::make_optional( + vk_command_buffer, device->workarounds_); + command_buffer->begin_recording(); + } + + BLI_assert(vk_command_buffer != VK_NULL_HANDLE); + + render_graph::VKRenderGraph &render_graph = *submit_task->render_graph; + Span node_handles; + { + std::scoped_lock lock_resources(device->resources.mutex); + node_handles = scheduler.select_nodes(render_graph); + command_builder.build_nodes(render_graph, *command_buffer, node_handles); + } + command_builder.record_commands(render_graph, *command_buffer, node_handles); + + if (submit_task->submit_to_device) { + command_buffer->end_recording(); + VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = { + VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, + nullptr, + 0, + nullptr, + 1, + &submit_task->timeline}; + VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO, + &vk_timeline_semaphore_submit_info, + 0, + nullptr, + nullptr, + 1, + &vk_command_buffer, + 1, + &device->vk_timeline_semaphore_}; + + { + std::scoped_lock lock_queue(*device->queue_mutex_); + vkQueueSubmit(device->vk_queue_, 1, &vk_submit_info, VK_NULL_HANDLE); + } + command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer); + vk_command_buffer = VK_NULL_HANDLE; + command_buffer.reset(); + } + + render_graph.reset(); + BLI_thread_queue_push(device->unused_render_graphs_, std::move(submit_task->render_graph)); + MEM_delete(submit_task); + } + + /* Clear command buffers and pool */ + vkDeviceWaitIdle(device->vk_device_); + command_buffers_in_use.remove_old(UINT64_MAX, [&](VkCommandBuffer vk_command_buffer) { + command_buffers_unused.append(vk_command_buffer); + }); + vkFreeCommandBuffers(device->vk_device_, + vk_command_pool, + command_buffers_unused.size(), + command_buffers_unused.data()); + vkDestroyCommandPool(device->vk_device_, vk_command_pool, nullptr); +} // namespace blender::gpu + +void VKDevice::init_submission_pool() +{ + submission_pool_ = BLI_task_pool_create_background_serial(this, TASK_PRIORITY_HIGH); + BLI_task_pool_push(submission_pool_, VKDevice::submission_runner, nullptr, false, nullptr); + submitted_render_graphs_ = BLI_thread_queue_init(); + unused_render_graphs_ = BLI_thread_queue_init(); + + VkSemaphoreTypeCreateInfo vk_semaphore_type_create_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_TIMELINE, 0}; + VkSemaphoreCreateInfo vk_semaphore_create_info = { + VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &vk_semaphore_type_create_info, 0}; + vkCreateSemaphore(vk_device_, &vk_semaphore_create_info, nullptr, &vk_timeline_semaphore_); +} + +void VKDevice::deinit_submission_pool() +{ + BLI_task_pool_free(submission_pool_); + submission_pool_ = nullptr; + + while (!BLI_thread_queue_is_empty(submitted_render_graphs_)) { + VKRenderGraphSubmitTask *submit_task = static_cast( + BLI_thread_queue_pop(submitted_render_graphs_)); + MEM_delete(submit_task); + } + BLI_thread_queue_free(submitted_render_graphs_); + submitted_render_graphs_ = nullptr; + BLI_thread_queue_free(unused_render_graphs_); + unused_render_graphs_ = nullptr; + + vkDestroySemaphore(vk_device_, vk_timeline_semaphore_, nullptr); + vk_timeline_semaphore_ = VK_NULL_HANDLE; +} + +/** \} */ + /* -------------------------------------------------------------------- */ /** \name VKThreadData * \{ */ @@ -389,6 +611,7 @@ VKThreadData &VKDevice::current_thread_data() return *thread_data; } +#if 0 VKDiscardPool &VKDevice::discard_pool_for_current_thread(bool thread_safe) { std::unique_lock lock(resources.mutex, std::defer_lock); @@ -406,6 +629,7 @@ VKDiscardPool &VKDevice::discard_pool_for_current_thread(bool thread_safe) return orphaned_data; } +#endif void VKDevice::context_register(VKContext &context) { @@ -414,6 +638,7 @@ void VKDevice::context_register(VKContext &context) void VKDevice::context_unregister(VKContext &context) { + orphaned_data.move_data(context.discard_pool, timeline_value_ + 1); contexts_.remove(contexts_.first_index_of(std::reference_wrapper(context))); } Span> VKDevice::contexts_get() const @@ -498,29 +723,16 @@ void VKDevice::debug_print() os << "ThreadData" << (is_main ? " (main-thread)" : "") << ")\n"; os << " Rendering_depth: " << thread_data->rendering_depth << "\n"; for (int resource_pool_index : IndexRange(thread_data->resource_pools.size())) { - const VKResourcePool &resource_pool = thread_data->resource_pools[resource_pool_index]; const bool is_active = thread_data->resource_pool_index == resource_pool_index; os << " Resource Pool (index=" << resource_pool_index << (is_active ? " active" : "") << ")\n"; - debug_print(os, resource_pool.discard_pool); } } - os << "Orphaned data\n"; + os << "Discard pool\n"; debug_print(os, orphaned_data); os << "\n"; } -void VKDevice::free_command_pool_buffers(VkCommandPool vk_command_pool) -{ - std::scoped_lock mutex(resources.mutex); - for (VKThreadData *thread_data : thread_data_) { - for (VKResourcePool &resource_pool : thread_data->resource_pools) { - resource_pool.discard_pool.free_command_pool_buffers(vk_command_pool, *this); - } - } - orphaned_data.free_command_pool_buffers(vk_command_pool, *this); -} - /** \} */ } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_device.hh b/source/blender/gpu/vulkan/vk_device.hh index b414aaea40f..25ffa118d9e 100644 --- a/source/blender/gpu/vulkan/vk_device.hh +++ b/source/blender/gpu/vulkan/vk_device.hh @@ -8,6 +8,10 @@ #pragma once +#include + +#include "BLI_task.h" +#include "BLI_threads.h" #include "BLI_utility_mixins.hh" #include "BLI_vector.hh" @@ -149,6 +153,35 @@ class VKDevice : public NonCopyable { VkQueue vk_queue_ = VK_NULL_HANDLE; std::mutex *queue_mutex_ = nullptr; + /** + * Lifetime of the device. + * + * Used for deinitialization of the command builder thread. + */ + enum Lifetime { + UNINITIALIZED, + RUNNING, + DEINITIALIZING, + DESTROYED, + }; + Lifetime lifetime = Lifetime::UNINITIALIZED; + /** + * Task pool for render graph submission. + * + * Multiple threads in Blender can build a render graph. Building the command buffer for a render + * graph is faster when doing it in serial. Submission pool ensures that only one task is + * building at a time (background_serial). + */ + TaskPool *submission_pool_ = nullptr; + /** + * All created render graphs. + */ + Vector render_graphs_; + ThreadQueue *submitted_render_graphs_ = nullptr; + ThreadQueue *unused_render_graphs_ = nullptr; + VkSemaphore vk_timeline_semaphore_ = VK_NULL_HANDLE; + std::atomic timeline_value_ = 0; + VKSamplers samplers_; VKDescriptorSetLayouts descriptor_set_layouts_; @@ -313,6 +346,31 @@ class VKDevice : public NonCopyable { const char *glsl_patch_get() const; void init_glsl_patch(); + /* -------------------------------------------------------------------- */ + /** \name Render graph + * \{ */ + static void submission_runner(TaskPool *__restrict pool, void *task_data); + render_graph::VKRenderGraph *render_graph_new(); + + TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph, + VKDiscardPool &context_discard_pool, + bool submit_to_device, + bool wait_for_completion); + void wait_for_timeline(TimelineValue timeline); + + /** + * Retrieve the last finished submission timeline. + */ + TimelineValue submission_finished_timeline_get() const + { + BLI_assert(vk_timeline_semaphore_ != VK_NULL_HANDLE); + TimelineValue current_timeline; + vkGetSemaphoreCounterValue(vk_device_, vk_timeline_semaphore_, ¤t_timeline); + return current_timeline; + } + + /** \} */ + /* -------------------------------------------------------------------- */ /** \name Resource management * \{ */ @@ -322,6 +380,7 @@ class VKDevice : public NonCopyable { */ VKThreadData ¤t_thread_data(); +#if 0 /** * Get the discard pool for the current thread. * @@ -337,6 +396,7 @@ class VKDevice : public NonCopyable { * function without trying to reacquire resources mutex making a deadlock. */ VKDiscardPool &discard_pool_for_current_thread(bool thread_safe = false); +#endif void context_register(VKContext &context); void context_unregister(VKContext &context); @@ -346,8 +406,6 @@ class VKDevice : public NonCopyable { static void debug_print(std::ostream &os, const VKDiscardPool &discard_pool); void debug_print(); - void free_command_pool_buffers(VkCommandPool vk_command_pool); - /** \} */ private: @@ -357,6 +415,8 @@ class VKDevice : public NonCopyable { void init_physical_device_extensions(); void init_debug_callbacks(); void init_memory_allocator(); + void init_submission_pool(); + void deinit_submission_pool(); /** * Initialize the functions struct with extension specific function pointer. */ diff --git a/source/blender/gpu/vulkan/vk_fence.cc b/source/blender/gpu/vulkan/vk_fence.cc index a343c461c5a..61c6f35b6d2 100644 --- a/source/blender/gpu/vulkan/vk_fence.cc +++ b/source/blender/gpu/vulkan/vk_fence.cc @@ -13,39 +13,18 @@ namespace blender::gpu { -VKFence::~VKFence() -{ - if (vk_fence_ != VK_NULL_HANDLE) { - VKDevice &device = VKBackend::get().device; - vkDestroyFence(device.vk_handle(), vk_fence_, nullptr); - vk_fence_ = VK_NULL_HANDLE; - } -} - void VKFence::signal() { - if (vk_fence_ == VK_NULL_HANDLE) { - VKDevice &device = VKBackend::get().device; - VkFenceCreateInfo vk_fence_create_info = {}; - vk_fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; - vk_fence_create_info.flags = VK_FENCE_CREATE_SIGNALED_BIT; - vkCreateFence(device.vk_handle(), &vk_fence_create_info, nullptr, &vk_fence_); - } VKContext &context = *VKContext::get(); - context.rendering_end(); - context.descriptor_set_get().upload_descriptor_sets(); - context.render_graph.submit_synchronization_event(vk_fence_); - signalled_ = true; + timeline_value_ = context.flush_render_graph(RenderGraphFlushFlags::SUBMIT | + RenderGraphFlushFlags::RENEW_RENDER_GRAPH); } void VKFence::wait() { - if (!signalled_) { - return; - } - VKContext &context = *VKContext::get(); - context.render_graph.wait_synchronization_event(vk_fence_); - signalled_ = false; + VKDevice &device = VKBackend::get().device; + device.wait_for_timeline(timeline_value_); + timeline_value_ = 0; } } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_fence.hh b/source/blender/gpu/vulkan/vk_fence.hh index 5c48d795eeb..158bd782c42 100644 --- a/source/blender/gpu/vulkan/vk_fence.hh +++ b/source/blender/gpu/vulkan/vk_fence.hh @@ -16,11 +16,7 @@ namespace blender::gpu { class VKFence : public Fence { private: - VkFence vk_fence_ = VK_NULL_HANDLE; - bool signalled_ = false; - - protected: - virtual ~VKFence(); + TimelineValue timeline_value_; public: void signal() override; diff --git a/source/blender/gpu/vulkan/vk_framebuffer.cc b/source/blender/gpu/vulkan/vk_framebuffer.cc index 05386bda35e..946022332ac 100644 --- a/source/blender/gpu/vulkan/vk_framebuffer.cc +++ b/source/blender/gpu/vulkan/vk_framebuffer.cc @@ -47,13 +47,13 @@ VKFrameBuffer::~VKFrameBuffer() void VKFrameBuffer::render_pass_free() { - VKDevice &device = VKBackend::get().device; + VKDiscardPool &discard_pool = VKDiscardPool::discard_pool_get(); if (vk_framebuffer != VK_NULL_HANDLE) { - device.discard_pool_for_current_thread().discard_framebuffer(vk_framebuffer); + discard_pool.discard_framebuffer(vk_framebuffer); vk_framebuffer = VK_NULL_HANDLE; } if (vk_render_pass != VK_NULL_HANDLE) { - device.discard_pool_for_current_thread().discard_render_pass(vk_render_pass); + discard_pool.discard_render_pass(vk_render_pass); vk_render_pass = VK_NULL_HANDLE; } } @@ -202,7 +202,7 @@ void VKFrameBuffer::clear(render_graph::VKClearAttachmentsNode::CreateInfo &clea { VKContext &context = *VKContext::get(); rendering_ensure(context); - context.render_graph.add_node(clear_attachments); + context.render_graph().add_node(clear_attachments); } void VKFrameBuffer::clear(const eGPUFrameBufferBits buffers, @@ -475,7 +475,7 @@ static void blit_aspect(VKContext &context, dst_texture.height_get()); region.dstOffsets[1].z = 1; - context.render_graph.add_node(blit_image); + context.render_graph().add_node(blit_image); } void VKFrameBuffer::blit_to(eGPUFrameBufferBits planes, @@ -780,7 +780,7 @@ void VKFrameBuffer::rendering_ensure_render_pass(VKContext &context) begin_info.framebuffer = vk_framebuffer; render_area_update(begin_info.renderArea); - context.render_graph.add_node(begin_rendering); + context.render_graph().add_node(begin_rendering); /* Load store operations are not supported inside a render pass. * It requires duplicating render passes and frame-buffers to support suspend/resume rendering. @@ -813,7 +813,7 @@ void VKFrameBuffer::rendering_ensure_render_pass(VKContext &context) render_area_update(clear_attachments.vk_clear_rect.rect); clear_attachments.vk_clear_rect.baseArrayLayer = 0; clear_attachments.vk_clear_rect.layerCount = 1; - context.render_graph.add_node(clear_attachments); + context.render_graph().add_node(clear_attachments); } } } @@ -970,7 +970,7 @@ void VKFrameBuffer::rendering_ensure_dynamic_rendering(VKContext &context, break; } - context.render_graph.add_node(begin_rendering); + context.render_graph().add_node(begin_rendering); } void VKFrameBuffer::rendering_ensure(VKContext &context) @@ -1030,7 +1030,7 @@ void VKFrameBuffer::rendering_end(VKContext &context) BLI_assert(vk_render_pass); end_rendering.vk_render_pass = vk_render_pass; } - context.render_graph.add_node(end_rendering); + context.render_graph().add_node(end_rendering); is_rendering_ = false; } } diff --git a/source/blender/gpu/vulkan/vk_image_view.cc b/source/blender/gpu/vulkan/vk_image_view.cc index 47405125ee0..b8ea0de5849 100644 --- a/source/blender/gpu/vulkan/vk_image_view.cc +++ b/source/blender/gpu/vulkan/vk_image_view.cc @@ -73,8 +73,7 @@ VKImageView::VKImageView(VKImageView &&other) : info(other.info) VKImageView::~VKImageView() { if (vk_image_view_ != VK_NULL_HANDLE) { - VKDevice &device = VKBackend::get().device; - device.discard_pool_for_current_thread().discard_image_view(vk_image_view_); + VKDiscardPool::discard_pool_get().discard_image_view(vk_image_view_); vk_image_view_ = VK_NULL_HANDLE; } vk_format_ = VK_FORMAT_UNDEFINED; diff --git a/source/blender/gpu/vulkan/vk_immediate.cc b/source/blender/gpu/vulkan/vk_immediate.cc index 538c3ba8cee..a895f200dab 100644 --- a/source/blender/gpu/vulkan/vk_immediate.cc +++ b/source/blender/gpu/vulkan/vk_immediate.cc @@ -117,7 +117,7 @@ void VKImmediate::end() vertex_attributes_.bind(draw.node_data.vertex_buffers); context.update_pipeline_data(prim_type, vertex_attributes_, draw.node_data.pipeline_data); - context.render_graph.add_node(draw); + context.render_graph().add_node(draw); } buffer_offset_ += current_subbuffer_len_; diff --git a/source/blender/gpu/vulkan/vk_query.cc b/source/blender/gpu/vulkan/vk_query.cc index da4f26ce818..893e2f6c66e 100644 --- a/source/blender/gpu/vulkan/vk_query.cc +++ b/source/blender/gpu/vulkan/vk_query.cc @@ -67,13 +67,13 @@ void VKQueryPool::begin_query() reset_query_pool.vk_query_pool = vk_query_pool; reset_query_pool.first_query = 0; reset_query_pool.query_count = query_chunk_len_; - context.render_graph.add_node(reset_query_pool); + context.render_graph().add_node(reset_query_pool); } render_graph::VKBeginQueryNode::Data begin_query = {}; begin_query.vk_query_pool = vk_query_pool; begin_query.query_index = query_index_in_pool(); - context.render_graph.add_node(begin_query); + context.render_graph().add_node(begin_query); } void VKQueryPool::end_query() @@ -82,7 +82,7 @@ void VKQueryPool::end_query() render_graph::VKEndQueryNode::Data end_query = {}; end_query.vk_query_pool = vk_query_pools_.last(); end_query.query_index = query_index_in_pool(); - context.render_graph.add_node(end_query); + context.render_graph().add_node(end_query); queries_issued_ += 1; } @@ -93,7 +93,9 @@ void VKQueryPool::get_occlusion_result(MutableSpan r_values) * ensure the END_RENDERING node */ context.rendering_end(); context.descriptor_set_get().upload_descriptor_sets(); - context.render_graph.submit(); + context.flush_render_graph(RenderGraphFlushFlags::SUBMIT | + RenderGraphFlushFlags::WAIT_FOR_COMPLETION | + RenderGraphFlushFlags::RENEW_RENDER_GRAPH); int queries_left = queries_issued_; int pool_index = 0; diff --git a/source/blender/gpu/vulkan/vk_resource_pool.cc b/source/blender/gpu/vulkan/vk_resource_pool.cc index 9bbbe197f6c..fcb0c70c1fc 100644 --- a/source/blender/gpu/vulkan/vk_resource_pool.cc +++ b/source/blender/gpu/vulkan/vk_resource_pool.cc @@ -8,6 +8,7 @@ #include "vk_resource_pool.hh" #include "vk_backend.hh" +#include "vk_context.hh" namespace blender::gpu { @@ -19,7 +20,6 @@ void VKResourcePool::init(VKDevice &device) void VKResourcePool::deinit(VKDevice &device) { immediate.deinit(device); - discard_pool.deinit(device); } void VKResourcePool::reset() @@ -30,13 +30,19 @@ void VKResourcePool::reset() void VKDiscardPool::deinit(VKDevice &device) { - destroy_discarded_resources(device); + destroy_discarded_resources(device, true); } -void VKDiscardPool::move_data(VKDiscardPool &src_pool) +void VKDiscardPool::move_data(VKDiscardPool &src_pool, TimelineValue timeline) { std::scoped_lock mutex(mutex_); - std::scoped_lock mutex_src(src_pool.mutex_); + src_pool.buffers_.update_timeline(timeline); + src_pool.image_views_.update_timeline(timeline); + src_pool.images_.update_timeline(timeline); + src_pool.shader_modules_.update_timeline(timeline); + src_pool.pipeline_layouts_.update_timeline(timeline); + src_pool.framebuffers_.update_timeline(timeline); + src_pool.render_passes_.update_timeline(timeline); buffers_.extend(std::move(src_pool.buffers_)); image_views_.extend(std::move(src_pool.image_views_)); images_.extend(std::move(src_pool.images_)); @@ -44,120 +50,94 @@ void VKDiscardPool::move_data(VKDiscardPool &src_pool) pipeline_layouts_.extend(std::move(src_pool.pipeline_layouts_)); framebuffers_.extend(std::move(src_pool.framebuffers_)); render_passes_.extend(std::move(src_pool.render_passes_)); - - for (const Map>::Item &item : - src_pool.command_buffers_.items()) - { - command_buffers_.lookup_or_add_default(item.key).extend(item.value); - } - src_pool.command_buffers_.clear(); } void VKDiscardPool::discard_image(VkImage vk_image, VmaAllocation vma_allocation) { std::scoped_lock mutex(mutex_); - images_.append(std::pair(vk_image, vma_allocation)); -} - -void VKDiscardPool::discard_command_buffer(VkCommandBuffer vk_command_buffer, - VkCommandPool vk_command_pool) -{ - std::scoped_lock mutex(mutex_); - command_buffers_.lookup_or_add_default(vk_command_pool).append(vk_command_buffer); -} - -void VKDiscardPool::free_command_pool_buffers(VkCommandPool vk_command_pool, VKDevice &device) -{ - std::scoped_lock mutex(mutex_); - std::optional> buffers = command_buffers_.pop_try( - vk_command_pool); - if (!buffers) { - return; - } - vkFreeCommandBuffers(device.vk_handle(), vk_command_pool, (*buffers).size(), (*buffers).begin()); + images_.append_timeline(timeline_, std::pair(vk_image, vma_allocation)); } void VKDiscardPool::discard_image_view(VkImageView vk_image_view) { std::scoped_lock mutex(mutex_); - image_views_.append(vk_image_view); + image_views_.append_timeline(timeline_, vk_image_view); } void VKDiscardPool::discard_buffer(VkBuffer vk_buffer, VmaAllocation vma_allocation) { std::scoped_lock mutex(mutex_); - buffers_.append(std::pair(vk_buffer, vma_allocation)); + buffers_.append_timeline(timeline_, std::pair(vk_buffer, vma_allocation)); } void VKDiscardPool::discard_shader_module(VkShaderModule vk_shader_module) { std::scoped_lock mutex(mutex_); - shader_modules_.append(vk_shader_module); + shader_modules_.append_timeline(timeline_, vk_shader_module); } void VKDiscardPool::discard_pipeline_layout(VkPipelineLayout vk_pipeline_layout) { std::scoped_lock mutex(mutex_); - pipeline_layouts_.append(vk_pipeline_layout); + pipeline_layouts_.append_timeline(timeline_, vk_pipeline_layout); } void VKDiscardPool::discard_framebuffer(VkFramebuffer vk_framebuffer) { std::scoped_lock mutex(mutex_); - framebuffers_.append(vk_framebuffer); + framebuffers_.append_timeline(timeline_, vk_framebuffer); } void VKDiscardPool::discard_render_pass(VkRenderPass vk_render_pass) { std::scoped_lock mutex(mutex_); - render_passes_.append(vk_render_pass); + render_passes_.append_timeline(timeline_, vk_render_pass); } -void VKDiscardPool::destroy_discarded_resources(VKDevice &device) +void VKDiscardPool::destroy_discarded_resources(VKDevice &device, bool force) { std::scoped_lock mutex(mutex_); + TimelineValue current_timeline = force ? UINT64_MAX : device.submission_finished_timeline_get(); - while (!image_views_.is_empty()) { - VkImageView vk_image_view = image_views_.pop_last(); + image_views_.remove_old(current_timeline, [&](VkImageView vk_image_view) { vkDestroyImageView(device.vk_handle(), vk_image_view, nullptr); - } + }); - while (!images_.is_empty()) { - std::pair image_allocation = images_.pop_last(); + images_.remove_old(current_timeline, [&](std::pair image_allocation) { device.resources.remove_image(image_allocation.first); vmaDestroyImage(device.mem_allocator_get(), image_allocation.first, image_allocation.second); - } - - while (!buffers_.is_empty()) { - std::pair buffer_allocation = buffers_.pop_last(); + }); + buffers_.remove_old(current_timeline, [&](std::pair buffer_allocation) { device.resources.remove_buffer(buffer_allocation.first); vmaDestroyBuffer( device.mem_allocator_get(), buffer_allocation.first, buffer_allocation.second); - } + }); - while (!pipeline_layouts_.is_empty()) { - VkPipelineLayout vk_pipeline_layout = pipeline_layouts_.pop_last(); + pipeline_layouts_.remove_old(current_timeline, [&](VkPipelineLayout vk_pipeline_layout) { vkDestroyPipelineLayout(device.vk_handle(), vk_pipeline_layout, nullptr); - } + }); - while (!shader_modules_.is_empty()) { - VkShaderModule vk_shader_module = shader_modules_.pop_last(); + shader_modules_.remove_old(current_timeline, [&](VkShaderModule vk_shader_module) { vkDestroyShaderModule(device.vk_handle(), vk_shader_module, nullptr); - } + }); - while (!framebuffers_.is_empty()) { - VkFramebuffer vk_framebuffer = framebuffers_.pop_last(); + framebuffers_.remove_old(current_timeline, [&](VkFramebuffer vk_framebuffer) { vkDestroyFramebuffer(device.vk_handle(), vk_framebuffer, nullptr); - } + }); - while (!render_passes_.is_empty()) { - VkRenderPass vk_render_pass = render_passes_.pop_last(); + render_passes_.remove_old(current_timeline, [&](VkRenderPass vk_render_pass) { vkDestroyRenderPass(device.vk_handle(), vk_render_pass, nullptr); + }); +} + +VKDiscardPool &VKDiscardPool::discard_pool_get() +{ + VKContext *context = VKContext::get(); + if (context != nullptr) { + return context->discard_pool; } - for (const Map>::Item &item : command_buffers_.items()) { - vkFreeCommandBuffers(device.vk_handle(), item.key, item.value.size(), item.value.begin()); - } - command_buffers_.clear(); + VKDevice &device = VKBackend::get().device; + return device.orphaned_data; } } // namespace blender::gpu diff --git a/source/blender/gpu/vulkan/vk_resource_pool.hh b/source/blender/gpu/vulkan/vk_resource_pool.hh index 497bf88b373..6aa276be755 100644 --- a/source/blender/gpu/vulkan/vk_resource_pool.hh +++ b/source/blender/gpu/vulkan/vk_resource_pool.hh @@ -15,6 +15,54 @@ namespace blender::gpu { class VKDevice; +class VKDiscardPool; + +template class TimelineResources : Vector> { + friend class VKDiscardPool; + + public: + void append_timeline(TimelineValue timeline, Item item) + { + BLI_assert_msg(this->is_empty() || this->last().first <= timeline, + "Timeline must be added in order"); + this->append(std::pair(timeline, item)); + } + + void update_timeline(TimelineValue timeline) + { + for (std::pair &pair : *this) { + pair.first = timeline; + } + } + + int64_t size() const + { + return static_cast> &>(*this).size(); + } + bool is_empty() const + { + return static_cast> &>(*this).is_empty(); + } + + /** + * Remove all items that are used in a timeline before or equal to the current_timeline. + */ + template void remove_old(TimelineValue current_timeline, Deleter deleter) + { + int64_t first_index_to_keep = 0; + for (std::pair &item : *this) { + if (item.first > current_timeline) { + break; + } + deleter(item.second); + first_index_to_keep++; + } + + if (first_index_to_keep > 0) { + this->remove(0, first_index_to_keep); + } + } +}; /** * Pool of resources that are discarded, but can still be in used and cannot be destroyed. @@ -30,31 +78,22 @@ class VKDiscardPool { friend class VKDevice; private: - Vector> images_; - Vector> buffers_; - Vector image_views_; - Vector shader_modules_; - Vector pipeline_layouts_; - Vector render_passes_; - Vector framebuffers_; - Map> command_buffers_; + TimelineResources> images_; + TimelineResources> buffers_; + TimelineResources image_views_; + TimelineResources shader_modules_; + TimelineResources pipeline_layouts_; + TimelineResources render_passes_; + TimelineResources framebuffers_; std::mutex mutex_; - /** - * Free command buffers generated from `vk_command_pool`. - * - * Command buffers are freed in `destroy_discarded_resources`, however if a `vk_command_pool` is - * going to be destroyed, commands buffers generated from this command pool needs to be freed at - * forehand. - */ - void free_command_pool_buffers(VkCommandPool vk_command_pool, VKDevice &device); + TimelineValue timeline_ = UINT64_MAX; public: void deinit(VKDevice &device); void discard_image(VkImage vk_image, VmaAllocation vma_allocation); - void discard_command_buffer(VkCommandBuffer vk_command_buffer, VkCommandPool vk_command_pool); void discard_image_view(VkImageView vk_image_view); void discard_buffer(VkBuffer vk_buffer, VmaAllocation vma_allocation); void discard_shader_module(VkShaderModule vk_shader_module); @@ -68,9 +107,19 @@ class VKDiscardPool { * GPU resources that are discarded from the dependency graph are stored in the device orphaned * data. When a swap chain context list is made active the orphaned data can be merged into a * swap chain discard pool. + * + * All moved items will receive a new timeline. */ - void move_data(VKDiscardPool &src_pool); - void destroy_discarded_resources(VKDevice &device); + void move_data(VKDiscardPool &src_pool, TimelineValue timeline); + void destroy_discarded_resources(VKDevice &device, bool force = false); + + /** + * Returns the discard pool for the current thread. + * + * When active thread has a context it uses the context discard pool. + * Otherwise the device discard pool is used. + */ + static VKDiscardPool &discard_pool_get(); }; class VKResourcePool { @@ -78,7 +127,6 @@ class VKResourcePool { public: VKDescriptorPools descriptor_pools; VKDescriptorSetTracker descriptor_set; - VKDiscardPool discard_pool; VKImmediate immediate; void init(VKDevice &device); diff --git a/source/blender/gpu/vulkan/vk_resource_tracker.cc b/source/blender/gpu/vulkan/vk_resource_tracker.cc index 0d3eacd4509..b1058eeb1c6 100644 --- a/source/blender/gpu/vulkan/vk_resource_tracker.cc +++ b/source/blender/gpu/vulkan/vk_resource_tracker.cc @@ -12,7 +12,7 @@ namespace blender::gpu { bool VKSubmissionTracker::is_changed(const VKContext &context) { - const VKSubmissionID ¤t_id = context.render_graph.submission_id; + const VKSubmissionID ¤t_id = context.render_graph().submission_id; if (last_known_id_ != current_id) { last_known_id_ = current_id; return true; diff --git a/source/blender/gpu/vulkan/vk_shader.cc b/source/blender/gpu/vulkan/vk_shader.cc index a5194ce83e0..6e6fc038893 100644 --- a/source/blender/gpu/vulkan/vk_shader.cc +++ b/source/blender/gpu/vulkan/vk_shader.cc @@ -519,8 +519,7 @@ void VKShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat VKShader::~VKShader() { - VKDevice &device = VKBackend::get().device; - VKDiscardPool &discard_pool = device.discard_pool_for_current_thread(); + VKDiscardPool &discard_pool = VKDiscardPool::discard_pool_get(); if (vk_pipeline_layout != VK_NULL_HANDLE) { discard_pool.discard_pipeline_layout(vk_pipeline_layout); diff --git a/source/blender/gpu/vulkan/vk_shader_module.cc b/source/blender/gpu/vulkan/vk_shader_module.cc index 9568c54820f..110f3242a06 100644 --- a/source/blender/gpu/vulkan/vk_shader_module.cc +++ b/source/blender/gpu/vulkan/vk_shader_module.cc @@ -16,8 +16,7 @@ namespace blender::gpu { VKShaderModule::~VKShaderModule() { - VKDevice &device = VKBackend::get().device; - VKDiscardPool &discard_pool = device.discard_pool_for_current_thread(); + VKDiscardPool &discard_pool = VKDiscardPool::discard_pool_get(); if (vk_shader_module != VK_NULL_HANDLE) { discard_pool.discard_shader_module(vk_shader_module); vk_shader_module = VK_NULL_HANDLE; diff --git a/source/blender/gpu/vulkan/vk_staging_buffer.cc b/source/blender/gpu/vulkan/vk_staging_buffer.cc index 82aa874dacb..76caba4a073 100644 --- a/source/blender/gpu/vulkan/vk_staging_buffer.cc +++ b/source/blender/gpu/vulkan/vk_staging_buffer.cc @@ -40,7 +40,7 @@ void VKStagingBuffer::copy_to_device(VKContext &context) copy_buffer.dst_buffer = device_buffer_.vk_handle(); copy_buffer.region.size = device_buffer_.size_in_bytes(); - context.render_graph.add_node(copy_buffer); + context.render_graph().add_node(copy_buffer); } void VKStagingBuffer::copy_from_device(VKContext &context) @@ -51,7 +51,7 @@ void VKStagingBuffer::copy_from_device(VKContext &context) copy_buffer.dst_buffer = host_buffer_.vk_handle(); copy_buffer.region.size = device_buffer_.size_in_bytes(); - context.render_graph.add_node(copy_buffer); + context.render_graph().add_node(copy_buffer); } void VKStagingBuffer::free() diff --git a/source/blender/gpu/vulkan/vk_storage_buffer.cc b/source/blender/gpu/vulkan/vk_storage_buffer.cc index 49ce423cb00..578aeb5a70f 100644 --- a/source/blender/gpu/vulkan/vk_storage_buffer.cc +++ b/source/blender/gpu/vulkan/vk_storage_buffer.cc @@ -88,7 +88,7 @@ void VKStorageBuffer::copy_sub(VertBuf *src, uint dst_offset, uint src_offset, u copy_buffer.region.size = copy_size; VKContext &context = *VKContext::get(); - context.render_graph.add_node(copy_buffer); + context.render_graph().add_node(copy_buffer); } void VKStorageBuffer::async_flush_to_host() diff --git a/source/blender/gpu/vulkan/vk_texture.cc b/source/blender/gpu/vulkan/vk_texture.cc index dd2fb85a511..68f256be90f 100644 --- a/source/blender/gpu/vulkan/vk_texture.cc +++ b/source/blender/gpu/vulkan/vk_texture.cc @@ -41,8 +41,7 @@ static VkImageAspectFlags to_vk_image_aspect_single_bit(const VkImageAspectFlags VKTexture::~VKTexture() { if (vk_image_ != VK_NULL_HANDLE && allocation_ != VK_NULL_HANDLE) { - VKDevice &device = VKBackend::get().device; - device.discard_pool_for_current_thread().discard_image(vk_image_, allocation_); + VKDiscardPool::discard_pool_get().discard_image(vk_image_, allocation_); vk_image_ = VK_NULL_HANDLE; allocation_ = VK_NULL_HANDLE; } @@ -83,7 +82,7 @@ void VKTexture::generate_mipmap() update_mipmaps.vk_image_aspect = to_vk_image_aspect_flag_bits(device_format_); update_mipmaps.mipmaps = mipmaps_; update_mipmaps.layer_count = vk_layer_count(1); - context.render_graph.add_node(update_mipmaps); + context.render_graph().add_node(update_mipmaps); } void VKTexture::copy_to(VKTexture &dst_texture, VkImageAspectFlags vk_image_aspect) @@ -101,7 +100,7 @@ void VKTexture::copy_to(VKTexture &dst_texture, VkImageAspectFlags vk_image_aspe copy_image.vk_image_aspect = to_vk_image_aspect_flag_bits(device_format_get()); VKContext &context = *VKContext::get(); - context.render_graph.add_node(copy_image); + context.render_graph().add_node(copy_image); } void VKTexture::copy_to(Texture *tex) @@ -142,7 +141,7 @@ void VKTexture::clear(eGPUDataFormat format, const void *data) VKContext &context = *VKContext::get(); - context.render_graph.add_node(clear_color_image); + context.render_graph().add_node(clear_color_image); } void VKTexture::clear_depth_stencil(const eGPUFrameBufferBits buffers, @@ -171,7 +170,7 @@ void VKTexture::clear_depth_stencil(const eGPUFrameBufferBits buffers, VK_REMAINING_MIP_LEVELS; VKContext &context = *VKContext::get(); - context.render_graph.add_node(clear_depth_stencil_image); + context.render_graph().add_node(clear_depth_stencil_image); } void VKTexture::swizzle_set(const char swizzle_mask[4]) @@ -220,9 +219,12 @@ void VKTexture::read_sub( VKContext &context = *VKContext::get(); context.rendering_end(); - context.render_graph.add_node(copy_image_to_buffer); + context.render_graph().add_node(copy_image_to_buffer); context.descriptor_set_get().upload_descriptor_sets(); - context.render_graph.submit_for_read(); + + context.flush_render_graph(RenderGraphFlushFlags::SUBMIT | + RenderGraphFlushFlags::RENEW_RENDER_GRAPH | + RenderGraphFlushFlags::WAIT_FOR_COMPLETION); convert_device_to_host( r_data, staging_buffer.mapped_memory_get(), sample_len, format, format_, device_format_); @@ -363,7 +365,7 @@ void VKTexture::update_sub(int mip, node_data.region.imageSubresource.baseArrayLayer = start_layer; node_data.region.imageSubresource.layerCount = layers; - context.render_graph.add_node(copy_buffer_to_image); + context.render_graph().add_node(copy_buffer_to_image); } void VKTexture::update_sub(