Vulkan: Device command builder

This PR implements a new the threading model for building render graphs based on tests performed last month. For out workload multithreaded command building will block in the driver or device. So better to use a single thread for command building. Details of the internal working is documented at https://developer.blender.org/docs/features/gpu/vulkan/render_graph/ - When a context is activated on a thread the context asks for a render graph it can use by calling `VKDevice::render_graph_new`. - Parts of the GPU backend that requires GPU commands will add a specific render graph node to the render graph. The nodes also contains a reference to all resources it needs including the access it needs and the image layout. - When the context is flushed the render graph is submitted to the device by calling `VKDevice::render_graph_submit`. - The device puts the render graph in `VKDevice::submission_pool`. - There is a single background thread that gets the next render graph to send to the GPU (`VKDevice::submission_runner`). - Reorder the commands of the render graph to comply with Vulkan specific command order rules and reducing possible bottlenecks. (`VKScheduler`) - Generate the required barriers `VKCommandBuilder::groups_extract_barriers`. This is a separate step to reduce resource locking giving other threads access to the resource states when they are building the render graph nodes. - GPU commands and pipeline barriers are recorded to a VkCommandBuffer. (`VKCommandBuilder::record_commands`) - When completed the command buffer can be submitted to the device queue. `vkQueueSubmit` - Render graphs that have been submitted can be reused by a next thread. This is done by pushing the render graph to the `VKDevice::unused_render_graphs` queue. Pull Request: https://projects.blender.org/blender/blender/pulls/132681
2025-01-27 08:55:23 +01:00
parent b34c135c30
commit e6b3cc8983
36 changed files with 667 additions and 509 deletions
--- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_compute.cc
+++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_compute.cc
@@ -28,7 +28,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_read_back)
  dispatch_info.dispatch_node.group_count_y = 1;
  dispatch_info.dispatch_node.group_count_z = 1;
  render_graph->add_node(dispatch_info);
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);
  EXPECT_EQ(3, log.size());
  EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)",
            log[0]);
@@ -75,7 +75,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_dispatch_read_back)
    dispatch_info.dispatch_node.group_count_z = 2;
    render_graph->add_node(dispatch_info);
  }
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);
  EXPECT_EQ(5, log.size());
  EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)",
            log[0]);
@@ -134,7 +134,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_dispatch_read_back_with_changing_descr
    dispatch_info.dispatch_node.group_count_z = 2;
    render_graph->add_node(dispatch_info);
  }
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);
  EXPECT_EQ(6, log.size());
  EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)",
            log[0]);
@@ -196,7 +196,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_dispatch_read_back_with_changing_pipel
    dispatch_info.dispatch_node.group_count_z = 2;
    render_graph->add_node(dispatch_info);
  }
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);
  EXPECT_EQ(6, log.size());
  EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)",
            log[0]);
@@ -259,7 +259,7 @@ TEST_F(VKRenderGraphTestCompute,
    dispatch_info.dispatch_node.group_count_z = 2;
    render_graph->add_node(dispatch_info);
  }
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);
  EXPECT_EQ(7, log.size());
  EXPECT_EQ("bind_pipeline(pipeline_bind_point=VK_PIPELINE_BIND_POINT_COMPUTE, pipeline=0x2)",
            log[0]);
@@ -309,7 +309,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_indirect_read_back)
  dispatch_indirect_info.dispatch_indirect_node.buffer = command_buffer;
  dispatch_indirect_info.dispatch_indirect_node.offset = 0;
  render_graph->add_node(dispatch_indirect_info);
-  render_graph->submit_for_read();
+  submit(render_graph, this->command_buffer);
  EXPECT_EQ(4, log.size());
  EXPECT_EQ(
      "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, "
@@ -364,7 +364,7 @@ TEST_F(VKRenderGraphTestCompute, dispatch_indirect_dispatch_indirect_read_back)
    dispatch_indirect_info.dispatch_indirect_node.offset = 12;
    render_graph->add_node(dispatch_indirect_info);
  }
-  render_graph->submit_for_read();
+  submit(render_graph, this->command_buffer);
  EXPECT_EQ(6, log.size());

  EXPECT_EQ(
--- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_present.cc
+++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_present.cc
@@ -15,8 +15,15 @@ TEST_F(VKRenderGraphTestPresent, transfer_and_present)
  VkHandle<VkImage> back_buffer(1u);

  resources.add_image(back_buffer, 1);
+  {
+    render_graph::VKSynchronizationNode::CreateInfo synchronization = {};
+    synchronization.vk_image = back_buffer;
+    synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+    render_graph->add_node(synchronization);
+  }

-  render_graph->submit_for_present(back_buffer);
+  submit(render_graph, command_buffer);

  EXPECT_EQ(1, log.size());
  EXPECT_EQ(
@@ -43,7 +50,15 @@ TEST_F(VKRenderGraphTestPresent, clear_and_present)
  clear_color_image.vk_image = back_buffer;
  render_graph->add_node(clear_color_image);

-  render_graph->submit_for_present(back_buffer);
+  {
+    render_graph::VKSynchronizationNode::CreateInfo synchronization = {};
+    synchronization.vk_image = back_buffer;
+    synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+    render_graph->add_node(synchronization);
+  }
+
+  submit(render_graph, command_buffer);

  EXPECT_EQ(3, log.size());

--- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_render.cc
+++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_render.cc
@@ -69,7 +69,7 @@ TEST_P(VKRenderGraphTestRender, begin_clear_attachments_end_read_back)
    render_graph->add_node(copy_image_to_buffer);
  }

-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);

  EXPECT_EQ(6, log.size());
  EXPECT_EQ(
@@ -185,7 +185,7 @@ TEST_P(VKRenderGraphTestRender, begin_draw_end)
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit();
+  submit(render_graph, command_buffer);
  EXPECT_EQ(5, log.size());
  EXPECT_EQ(
      "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, "
@@ -267,7 +267,7 @@ TEST_P(VKRenderGraphTestRender, begin_draw_end__layered)
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit();
+  submit(render_graph, command_buffer);
  EXPECT_EQ(7, log.size());
  EXPECT_EQ(
      "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, "
--- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_scheduler.cc
+++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_scheduler.cc
@@ -54,7 +54,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_rendering_copy_buffer_end_rendering)
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit_for_present(image);
+  {
+    render_graph::VKSynchronizationNode::CreateInfo synchronization = {};
+    synchronization.vk_image = image;
+    synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+    render_graph->add_node(synchronization);
+  }
+
+  submit(render_graph, command_buffer);
  EXPECT_EQ(6, log.size());

  EXPECT_EQ(
@@ -169,7 +177,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_clear_attachments_copy_buffer_end)
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit_for_present(image);
+  {
+    render_graph::VKSynchronizationNode::CreateInfo synchronization = {};
+    synchronization.vk_image = image;
+    synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+    render_graph->add_node(synchronization);
+  }
+
+  submit(render_graph, command_buffer);
  EXPECT_EQ(7, log.size());
  EXPECT_EQ(
      "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, "
@@ -290,7 +306,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_copy_buffer_clear_attachments_end)
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit_for_present(image);
+  {
+    render_graph::VKSynchronizationNode::CreateInfo synchronization = {};
+    synchronization.vk_image = image;
+    synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+    render_graph->add_node(synchronization);
+  }
+
+  submit(render_graph, command_buffer);
  EXPECT_EQ(7, log.size());
  EXPECT_EQ(
      "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, "
@@ -427,7 +451,15 @@ TEST_P(VKRenderGraphTestScheduler, begin_clear_attachments_copy_buffer_clear_att
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit_for_present(image);
+  {
+    render_graph::VKSynchronizationNode::CreateInfo synchronization = {};
+    synchronization.vk_image = image;
+    synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+    synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+    render_graph->add_node(synchronization);
+  }
+
+  submit(render_graph, command_buffer);
  ASSERT_EQ(8, log.size());

  EXPECT_EQ(
@@ -585,7 +617,7 @@ TEST_P(VKRenderGraphTestScheduler, begin_draw_copy_framebuffer_draw_end)
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit();
+  submit(render_graph, command_buffer);
  ASSERT_EQ(12, log.size());

  EXPECT_EQ(
@@ -800,7 +832,7 @@ TEST_P(VKRenderGraphTestScheduler, begin_update_draw_update_draw_update_draw_end
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit();
+  submit(render_graph, command_buffer);
  ASSERT_EQ(17, log.size());
  EXPECT_EQ("update_buffer(dst_buffer=0x1, dst_offset=0, data_size=16)", log[0]);
  EXPECT_EQ("update_buffer(dst_buffer=0x2, dst_offset=0, data_size=24)", log[1]);
@@ -997,7 +1029,7 @@ TEST_P(VKRenderGraphTestScheduler, begin_draw_copy_to_attachment_draw_end)
    render_graph->add_node(end_rendering);
  }

-  render_graph->submit();
+  submit(render_graph, command_buffer);
  ASSERT_EQ(11, log.size());
  EXPECT_EQ(
      "pipeline_barrier(src_stage_mask=VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, "
--- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_transfer.cc
+++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_transfer.cc
@@ -20,7 +20,7 @@ TEST_F(VKRenderGraphTestTransfer, fill_and_read_back)
  resources.add_buffer(buffer);
  VKFillBufferNode::CreateInfo fill_buffer = {buffer, 1024, 42};
  render_graph->add_node(fill_buffer);
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);

  EXPECT_EQ(1, log.size());
  EXPECT_EQ("fill_buffer(dst_buffer=0x1, dst_offset=0, size=1024, data=42)", log[0]);
@@ -47,7 +47,7 @@ TEST_F(VKRenderGraphTestTransfer, fill_transfer_and_read_back)
  copy_buffer.region.size = 1024;
  render_graph->add_node(copy_buffer);

-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);

  EXPECT_EQ(3, log.size());
  EXPECT_EQ("fill_buffer(dst_buffer=0x1, dst_offset=0, size=1024, data=42)", log[0]);
@@ -79,7 +79,7 @@ TEST_F(VKRenderGraphTestTransfer, fill_fill_read_back)
  render_graph->add_node(fill_buffer_1);
  VKFillBufferNode::CreateInfo fill_buffer_2 = {buffer, 1024, 42};
  render_graph->add_node(fill_buffer_2);
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);

  EXPECT_EQ(3, log.size());
  EXPECT_EQ("fill_buffer(dst_buffer=0x1, dst_offset=0, size=1024, data=0)", log[0]);
@@ -142,7 +142,7 @@ TEST_F(VKRenderGraphTestTransfer, clear_clear_copy_and_read_back)
  render_graph->add_node(clear_color_image_dst);
  render_graph->add_node(copy_image);
  render_graph->add_node(copy_dst_image_to_buffer);
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);

  EXPECT_EQ(8, log.size());
  EXPECT_EQ(
@@ -268,7 +268,7 @@ TEST_F(VKRenderGraphTestTransfer, clear_blit_copy_and_read_back)
  VKBlitImageNode::CreateInfo blit_image = {src_image, dst_image, vk_image_blit, VK_FILTER_LINEAR};
  render_graph->add_node(blit_image);
  render_graph->add_node(copy_dst_image_to_buffer);
-  render_graph->submit_for_read();
+  submit(render_graph, command_buffer);

  EXPECT_EQ(6, log.size());
  EXPECT_EQ(
--- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh
+++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh
@@ -52,19 +52,6 @@ class CommandBufferLog : public VKCommandBufferInterface {
    is_recording_ = false;
  }

-  void submit_with_cpu_synchronization(VkFence /*vk_fence*/) override
-  {
-    EXPECT_FALSE(is_recording_);
-    EXPECT_FALSE(is_cpu_synchronizing_);
-    is_cpu_synchronizing_ = true;
-  };
-  void wait_for_cpu_synchronization(VkFence /*vk_fence*/) override
-  {
-    EXPECT_FALSE(is_recording_);
-    EXPECT_TRUE(is_cpu_synchronizing_);
-    is_cpu_synchronizing_ = false;
-  };
-
  void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) override
  {
    EXPECT_TRUE(is_recording_);
@@ -481,16 +468,16 @@ class VKRenderGraphTest : public ::testing::Test {
  {
    resources.use_dynamic_rendering = use_dynamic_rendering;
    resources.use_dynamic_rendering_local_read = use_dynamic_rendering_local_read;
-    render_graph = std::make_unique<VKRenderGraph>(
-        std::make_unique<CommandBufferLog>(
-            log, use_dynamic_rendering, use_dynamic_rendering_local_read),
-        resources);
+    render_graph = std::make_unique<VKRenderGraph>(resources);
+    command_buffer = std::make_unique<CommandBufferLog>(
+        log, use_dynamic_rendering, use_dynamic_rendering_local_read);
  }

 protected:
  Vector<std::string> log;
  VKResourceStateTracker resources;
  std::unique_ptr<VKRenderGraph> render_graph;
+  std::unique_ptr<CommandBufferLog> command_buffer;
  bool use_dynamic_rendering = true;
  bool use_dynamic_rendering_local_read = true;
 };
@@ -503,10 +490,9 @@ class VKRenderGraphTest_P : public ::testing::TestWithParam<std::tuple<bool, boo
    use_dynamic_rendering_local_read = std::get<1>(GetParam());
    resources.use_dynamic_rendering = use_dynamic_rendering;
    resources.use_dynamic_rendering_local_read = use_dynamic_rendering_local_read;
-    render_graph = std::make_unique<VKRenderGraph>(
-        std::make_unique<CommandBufferLog>(
-            log, use_dynamic_rendering, use_dynamic_rendering_local_read),
-        resources);
+    render_graph = std::make_unique<VKRenderGraph>(resources);
+    command_buffer = std::make_unique<CommandBufferLog>(
+        log, use_dynamic_rendering, use_dynamic_rendering_local_read);
  }

 protected:
@@ -524,6 +510,7 @@ class VKRenderGraphTest_P : public ::testing::TestWithParam<std::tuple<bool, boo
  Vector<std::string> log;
  VKResourceStateTracker resources;
  std::unique_ptr<VKRenderGraph> render_graph;
+  std::unique_ptr<CommandBufferLog> command_buffer;
  bool use_dynamic_rendering = true;
  bool use_dynamic_rendering_local_read = true;
 };
@@ -546,4 +533,18 @@ template<typename VKObjectType> union VkHandle {
  }
 };

+static inline void submit(std::unique_ptr<VKRenderGraph> &render_graph,
+                          std::unique_ptr<CommandBufferLog> &command_buffer)
+{
+  VKScheduler scheduler;
+  VKCommandBuilder command_builder;
+  Span<render_graph::NodeHandle> node_handles = scheduler.select_nodes(*render_graph);
+  command_builder.build_nodes(*render_graph, *command_buffer, node_handles);
+
+  command_buffer->begin_recording();
+  command_builder.record_commands(*render_graph, *command_buffer, node_handles);
+  command_buffer->end_recording();
+
+  render_graph->reset();
+}
 }  // namespace blender::gpu::render_graph
--- a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc
+++ b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc
@@ -11,72 +11,22 @@
 #include "vk_device.hh"

 namespace blender::gpu::render_graph {
-VKCommandBufferWrapper::VKCommandBufferWrapper(const VKWorkarounds &workarounds)
+VKCommandBufferWrapper::VKCommandBufferWrapper(VkCommandBuffer vk_command_buffer,
+                                               const VKWorkarounds &workarounds)
+    : vk_command_buffer_(vk_command_buffer)
 {
-  vk_command_pool_create_info_ = {};
-  vk_command_pool_create_info_.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-  vk_command_pool_create_info_.queueFamilyIndex = 0;
-
-  vk_command_buffer_allocate_info_ = {};
-  vk_command_buffer_allocate_info_.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
-  vk_command_buffer_allocate_info_.commandPool = VK_NULL_HANDLE;
-  vk_command_buffer_allocate_info_.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
-  vk_command_buffer_allocate_info_.commandBufferCount = 1;
-
-  vk_command_buffer_begin_info_ = {};
-  vk_command_buffer_begin_info_.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
-  vk_command_buffer_begin_info_.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
-
-  vk_fence_create_info_ = {};
-  vk_fence_create_info_.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-  vk_fence_create_info_.flags = VK_FENCE_CREATE_SIGNALED_BIT;
-
-  vk_submit_info_ = {};
-  vk_submit_info_.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
-  vk_submit_info_.waitSemaphoreCount = 0;
-  vk_submit_info_.pWaitSemaphores = nullptr;
-  vk_submit_info_.pWaitDstStageMask = nullptr;
-  vk_submit_info_.commandBufferCount = 1;
-  vk_submit_info_.pCommandBuffers = &vk_command_buffer_;
-  vk_submit_info_.signalSemaphoreCount = 0;
-  vk_submit_info_.pSignalSemaphores = nullptr;
-
  use_dynamic_rendering = !workarounds.dynamic_rendering;
  use_dynamic_rendering_local_read = !workarounds.dynamic_rendering_local_read;
 }

-VKCommandBufferWrapper::~VKCommandBufferWrapper()
-{
-  VKDevice &device = VKBackend::get().device;
-  device.free_command_pool_buffers(vk_command_pool_);
-  if (vk_command_pool_ != VK_NULL_HANDLE) {
-    vkDestroyCommandPool(device.vk_handle(), vk_command_pool_, nullptr);
-    vk_command_pool_ = VK_NULL_HANDLE;
-  }
-  if (vk_fence_ != VK_NULL_HANDLE) {
-    vkDestroyFence(device.vk_handle(), vk_fence_, nullptr);
-    vk_fence_ = VK_NULL_HANDLE;
-  }
-}
-
 void VKCommandBufferWrapper::begin_recording()
 {
-  VKDevice &device = VKBackend::get().device;
-  if (vk_command_pool_ == VK_NULL_HANDLE) {
-    vk_command_pool_create_info_.queueFamilyIndex = device.queue_family_get();
-    vkCreateCommandPool(
-        device.vk_handle(), &vk_command_pool_create_info_, nullptr, &vk_command_pool_);
-    vk_command_buffer_allocate_info_.commandPool = vk_command_pool_;
-    vk_command_pool_create_info_.queueFamilyIndex = 0;
-  }
-  if (vk_fence_ == VK_NULL_HANDLE) {
-    vkCreateFence(device.vk_handle(), &vk_fence_create_info_, nullptr, &vk_fence_);
-  }
-  BLI_assert(vk_command_buffer_ == VK_NULL_HANDLE);
-  vkAllocateCommandBuffers(
-      device.vk_handle(), &vk_command_buffer_allocate_info_, &vk_command_buffer_);
-
-  vkBeginCommandBuffer(vk_command_buffer_, &vk_command_buffer_begin_info_);
+  VkCommandBufferBeginInfo vk_command_buffer_begin_info = {
+      VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+      nullptr,
+      VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+      nullptr};
+  vkBeginCommandBuffer(vk_command_buffer_, &vk_command_buffer_begin_info);
 }

 void VKCommandBufferWrapper::end_recording()
@@ -84,32 +34,6 @@ void VKCommandBufferWrapper::end_recording()
  vkEndCommandBuffer(vk_command_buffer_);
 }

-void VKCommandBufferWrapper::submit_with_cpu_synchronization(VkFence vk_fence)
-{
-  if (vk_fence == VK_NULL_HANDLE) {
-    vk_fence = vk_fence_;
-  }
-  VKDevice &device = VKBackend::get().device;
-  vkResetFences(device.vk_handle(), 1, &vk_fence);
-  {
-    std::scoped_lock lock(device.queue_mutex_get());
-    vkQueueSubmit(device.queue_get(), 1, &vk_submit_info_, vk_fence);
-  }
-  device.discard_pool_for_current_thread(true).discard_command_buffer(vk_command_buffer_,
-                                                                      vk_command_pool_);
-  vk_command_buffer_ = nullptr;
-}
-
-void VKCommandBufferWrapper::wait_for_cpu_synchronization(VkFence vk_fence)
-{
-  if (vk_fence == VK_NULL_HANDLE) {
-    vk_fence = vk_fence_;
-  }
-  VKDevice &device = VKBackend::get().device;
-  while (vkWaitForFences(device.vk_handle(), 1, &vk_fence, true, UINT64_MAX) == VK_TIMEOUT) {
-  }
-}
-
 void VKCommandBufferWrapper::bind_pipeline(VkPipelineBindPoint pipeline_bind_point,
                                           VkPipeline pipeline)
 {
--- a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh
+++ b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh
@@ -25,8 +25,6 @@ class VKCommandBufferInterface {

  virtual void begin_recording() = 0;
  virtual void end_recording() = 0;
-  virtual void submit_with_cpu_synchronization(VkFence vk_fence = VK_NULL_HANDLE) = 0;
-  virtual void wait_for_cpu_synchronization(VkFence vk_fence = VK_NULL_HANDLE) = 0;

  virtual void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) = 0;
  virtual void bind_descriptor_sets(VkPipelineBindPoint pipeline_bind_point,
@@ -145,24 +143,13 @@ class VKCommandBufferInterface {

 class VKCommandBufferWrapper : public VKCommandBufferInterface {
 private:
-  VkCommandPoolCreateInfo vk_command_pool_create_info_;
-  VkCommandBufferAllocateInfo vk_command_buffer_allocate_info_;
-  VkCommandBufferBeginInfo vk_command_buffer_begin_info_;
-  VkFenceCreateInfo vk_fence_create_info_;
-  VkSubmitInfo vk_submit_info_;
-
-  VkCommandPool vk_command_pool_ = VK_NULL_HANDLE;
  VkCommandBuffer vk_command_buffer_ = VK_NULL_HANDLE;
-  VkFence vk_fence_ = VK_NULL_HANDLE;

 public:
-  VKCommandBufferWrapper(const VKWorkarounds &workarounds);
-  virtual ~VKCommandBufferWrapper();
+  VKCommandBufferWrapper(VkCommandBuffer vk_command_buffer, const VKWorkarounds &workarounds);

  void begin_recording() override;
  void end_recording() override;
-  void submit_with_cpu_synchronization(VkFence vk_fence) override;
-  void wait_for_cpu_synchronization(VkFence vk_fence) override;

  void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) override;
  void bind_descriptor_sets(VkPipelineBindPoint pipeline_bind_point,
--- a/source/blender/gpu/vulkan/render_graph/vk_command_builder.cc
+++ b/source/blender/gpu/vulkan/render_graph/vk_command_builder.cc
@@ -26,10 +26,13 @@ void VKCommandBuilder::build_nodes(VKRenderGraph &render_graph,
  groups_init(render_graph, node_handles);
  groups_extract_barriers(
      render_graph, node_handles, command_buffer.use_dynamic_rendering_local_read);
+}

-  command_buffer.begin_recording();
+void VKCommandBuilder::record_commands(VKRenderGraph &render_graph,
+                                       VKCommandBufferInterface &command_buffer,
+                                       Span<NodeHandle> node_handles)
+{
  groups_build_commands(render_graph, command_buffer, node_handles);
-  command_buffer.end_recording();
 }

 void VKCommandBuilder::groups_init(const VKRenderGraph &render_graph,
@@ -71,8 +74,8 @@ void VKCommandBuilder::groups_extract_barriers(VKRenderGraph &render_graph,
  node_pre_barriers_.resize(node_handles.size());

  /* Keep track of the post barriers that needs to be added. The pre barriers will be stored
-   * directly in `barrier_list_` but may not mingle with the pre barriers. Most barriers are group
-   * pre barriers. */
+   * directly in `barrier_list_` but may not mingle with the pre barriers. Most barriers are
+   * group pre barriers. */
  Vector<Barrier> post_barriers;
  /* Keep track of the node pre barriers that needs to be added. The pre barriers will be stored
   * directly in `barrier_list_` but may not mingle with the group barriers. */
@@ -157,8 +160,8 @@ void VKCommandBuilder::groups_extract_barriers(VKRenderGraph &render_graph,
          barrier_list_.append(barrier);
        }

-        /* Resume layered tracking. Each layer that has an override will be transition back to the
-         * layer specific image layout. */
+        /* Resume layered tracking. Each layer that has an override will be transition back to
+         * the layer specific image layout. */
        barrier = {};
        layered_tracker.resume(barrier, use_local_read);
        if (!barrier.is_empty()) {
--- a/source/blender/gpu/vulkan/render_graph/vk_command_builder.hh
+++ b/source/blender/gpu/vulkan/render_graph/vk_command_builder.hh
@@ -177,21 +177,23 @@ class VKCommandBuilder {

 public:
  /**
-   * Build the commands of the nodes provided by the `node_handles` parameter. The commands are
-   * recorded into the given `command_buffer`.
-   *
-   * Pre-condition:
-   * - `command_buffer` must not be in initial state according to
-   *   https://docs.vulkan.org/spec/latest/chapters/cmdbuffers.html#commandbuffers-lifecycle
-   *
-   * Post-condition:
-   * - `command_buffer` will be in executable state according to
-   *   https://docs.vulkan.org/spec/latest/chapters/cmdbuffers.html#commandbuffers-lifecycle
+   * Build execution groups and barriers.
+   * This method should be performed when the resources are locked.
   */
  void build_nodes(VKRenderGraph &render_graph,
                   VKCommandBufferInterface &command_buffer,
                   Span<NodeHandle> node_handles);

+  /**
+   * Record commands of the nodes provided by the `node_handles` parameter. The commands are
+   * recorded into the given `command_buffer`.
+   *
+   * `build_nodes` needs to be called before calling with exact the same parameters.
+   */
+  void record_commands(VKRenderGraph &render_graph,
+                       VKCommandBufferInterface &command_buffer,
+                       Span<NodeHandle> node_handles);
+
 private:
  /**
   *  Split the node_handles in logical groups.
--- a/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc
+++ b/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc
@@ -13,20 +13,13 @@

 namespace blender::gpu::render_graph {

-VKRenderGraph::VKRenderGraph(std::unique_ptr<VKCommandBufferInterface> command_buffer,
-                             VKResourceStateTracker &resources)
-    : command_buffer_(std::move(command_buffer)), resources_(resources)
+VKRenderGraph::VKRenderGraph(VKResourceStateTracker &resources) : resources_(resources)
 {
  submission_id.reset();
 }

-void VKRenderGraph::remove_nodes(Span<NodeHandle> node_handles)
+void VKRenderGraph::reset()
 {
-  UNUSED_VARS_NDEBUG(node_handles);
-  BLI_assert_msg(node_handles.size() == nodes_.size(),
-                 "Currently only supporting removing all nodes. The VKScheduler doesn't walk the "
-                 "nodes, and will use incorrect ordering when not all nodes are removed. This "
-                 "needs to be fixed when implementing a better scheduler.");
  links_.clear();
  for (VKRenderGraphNode &node : nodes_) {
    node.free_data(storage_);
@@ -40,68 +33,6 @@ void VKRenderGraph::remove_nodes(Span<NodeHandle> node_handles)

 /** \} */

-/* -------------------------------------------------------------------- */
-/** \name Submit graph
- * \{ */
-
-void VKRenderGraph::submit_for_present(VkImage vk_swapchain_image)
-{
-  /* Needs to be executed at forehand as `add_node` also locks the mutex. */
-  VKSynchronizationNode::CreateInfo synchronization = {};
-  synchronization.vk_image = vk_swapchain_image;
-  synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
-  synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
-  add_node<VKSynchronizationNode>(synchronization);
-
-  std::scoped_lock lock(resources_.mutex);
-  Span<NodeHandle> node_handles = scheduler_.select_nodes(*this);
-  command_builder_.build_nodes(*this, *command_buffer_, node_handles);
-  /* TODO: To improve performance it could be better to return a semaphore. This semaphore can be
-   * passed in the swapchain to ensure GPU synchronization. This also require a second semaphore to
-   * pause drawing until the swapchain has completed its drawing phase.
-   *
-   * Currently using CPU synchronization for safety. */
-  command_buffer_->submit_with_cpu_synchronization();
-  submission_id.next();
-  remove_nodes(node_handles);
-  command_buffer_->wait_for_cpu_synchronization();
-}
-
-void VKRenderGraph::submit_for_read()
-{
-  std::scoped_lock lock(resources_.mutex);
-  Span<NodeHandle> node_handles = scheduler_.select_nodes(*this);
-  command_builder_.build_nodes(*this, *command_buffer_, node_handles);
-  command_buffer_->submit_with_cpu_synchronization();
-  submission_id.next();
-  remove_nodes(node_handles);
-  command_buffer_->wait_for_cpu_synchronization();
-}
-
-void VKRenderGraph::submit()
-{
-  /* Using `VK_NULL_HANDLE` will select the default VkFence of the command buffer. */
-  submit_synchronization_event(VK_NULL_HANDLE);
-  wait_synchronization_event(VK_NULL_HANDLE);
-}
-
-void VKRenderGraph::submit_synchronization_event(VkFence vk_fence)
-{
-  std::scoped_lock lock(resources_.mutex);
-  Span<NodeHandle> node_handles = scheduler_.select_nodes(*this);
-  command_builder_.build_nodes(*this, *command_buffer_, node_handles);
-  command_buffer_->submit_with_cpu_synchronization(vk_fence);
-  submission_id.next();
-  remove_nodes(node_handles);
-}
-
-void VKRenderGraph::wait_synchronization_event(VkFence vk_fence)
-{
-  command_buffer_->wait_for_cpu_synchronization(vk_fence);
-}
-
-/** \} */
-
 /* -------------------------------------------------------------------- */
 /** \name Debug
 * \{ */
--- a/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh
+++ b/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh
@@ -75,22 +75,6 @@ class VKRenderGraph : public NonCopyable {
  /** Storage for large node datas to improve CPU cache pre-loading. */
  VKRenderGraphStorage storage_;

-  /** Scheduler decides which nodes to select and in what order to execute them. */
-  VKScheduler scheduler_;
-  /**
-   * Command builder generated the commands of the nodes and record them into the command buffer.
-   */
-  VKCommandBuilder command_builder_;
-
-  /**
-   * Command buffer sends the commands to the device (`VKCommandBufferWrapper`).
-   *
-   * To improve testability the command buffer can be replaced by an instance of
-   * `VKCommandBufferLog` this way test cases don't need to create a fully working context in order
-   * to test something render graph specific.
-   */
-  std::unique_ptr<VKCommandBufferInterface> command_buffer_;
-
  /**
   * Not owning pointer to device resources.
   *
@@ -117,11 +101,16 @@ class VKRenderGraph : public NonCopyable {

    /** Current stack of debug group names. */
    Vector<DebugGroupNameID> group_stack;
-    /** Has a node been added to the current stack? If not the group stack will be added to
-     * used_groups. */
+
+    /**
+     * Has a node been added to the current stack? If not the group stack will be added to
+     * used_groups.
+     */
    bool group_used = false;
+
    /** All used debug groups. */
    Vector<Vector<DebugGroupNameID>> used_groups;
+
    /**
     * Map of a node_handle to an index of debug group in used_groups.
     *
@@ -142,8 +131,7 @@ class VKRenderGraph : public NonCopyable {
   * To improve testability the command buffer and resources they work on are provided as a
   * parameter.
   */
-  VKRenderGraph(std::unique_ptr<VKCommandBufferInterface> command_buffer,
-                VKResourceStateTracker &resources);
+  VKRenderGraph(VKResourceStateTracker &resources);

 private:
  /**
@@ -214,41 +202,9 @@ class VKRenderGraph : public NonCopyable {
  ADD_NODE(VKResetQueryPoolNode)
  ADD_NODE(VKUpdateBufferNode)
  ADD_NODE(VKUpdateMipmapsNode)
+  ADD_NODE(VKSynchronizationNode)
 #undef ADD_NODE

-  /**
-   * Submit partial graph to be able to read the expected result of the rendering commands
-   * affecting the given vk_buffer. This method is called from
-   * `GPU_texture/storagebuf/indexbuf/vertbuf/_read`. In vulkan the content of images cannot be
-   * read directly and always needs to be copied to a transfer buffer.
-   *
-   * After calling this function the mapped memory of the vk_buffer would contain the data of the
-   * buffer.
-   */
-  void submit_for_read();
-
-  /**
-   * Submit partial graph to be able to present the expected result of the rendering commands
-   * affecting the given vk_swapchain_image. This method is called when performing a
-   * swap chain swap.
-   *
-   * Pre conditions:
-   * - `vk_swapchain_image` needs to be registered in VKResourceStateTracker.
-   *
-   * Post conditions:
-   * - `vk_swapchain_image` layout is transitioned to `VK_IMAGE_LAYOUT_SRC_PRESENT`.
-   */
-  void submit_for_present(VkImage vk_swapchain_image);
-
-  /**
-   * Submit full graph.
-   */
-  void submit();
-
-  /**  Submit render graph with CPU synchronization event. */
-  void submit_synchronization_event(VkFence vk_fence);
-  /** Wait and reset for a CPU synchronization event. */
-  void wait_synchronization_event(VkFence vk_fence);
  /**
   * Push a new debugging group to the stack with the given name.
   *
@@ -289,8 +245,12 @@ class VKRenderGraph : public NonCopyable {

  void debug_print(NodeHandle node_handle) const;

+  /**
+   * Reset the render graph.
+   */
+  void reset();
+
 private:
-  void remove_nodes(Span<NodeHandle> node_handles);
 };

 }  // namespace blender::gpu::render_graph
--- a/source/blender/gpu/vulkan/render_graph/vk_scheduler.cc
+++ b/source/blender/gpu/vulkan/render_graph/vk_scheduler.cc
@@ -22,7 +22,6 @@ Span<NodeHandle> VKScheduler::select_nodes(const VKRenderGraph &render_graph)
  for (NodeHandle node_handle : render_graph.nodes_.index_range()) {
    result_.append(node_handle);
  }
-
  reorder_nodes(render_graph);
  return result_;
 }
--- a/source/blender/gpu/vulkan/vk_backend.cc
+++ b/source/blender/gpu/vulkan/vk_backend.cc
@@ -456,7 +456,7 @@ void VKBackend::compute_dispatch(int groups_x_len, int groups_y_len, int groups_
  dispatch_info.dispatch_node.group_count_x = groups_x_len;
  dispatch_info.dispatch_node.group_count_y = groups_y_len;
  dispatch_info.dispatch_node.group_count_z = groups_z_len;
-  context.render_graph.add_node(dispatch_info);
+  context.render_graph().add_node(dispatch_info);
 }

 void VKBackend::compute_dispatch_indirect(StorageBuf *indirect_buf)
@@ -469,7 +469,7 @@ void VKBackend::compute_dispatch_indirect(StorageBuf *indirect_buf)
  context.update_pipeline_data(dispatch_indirect_info.dispatch_indirect_node.pipeline_data);
  dispatch_indirect_info.dispatch_indirect_node.buffer = indirect_buffer.vk_handle();
  dispatch_indirect_info.dispatch_indirect_node.offset = 0;
-  context.render_graph.add_node(dispatch_indirect_info);
+  context.render_graph().add_node(dispatch_indirect_info);
 }

 Context *VKBackend::context_alloc(void *ghost_window, void *ghost_context)
@@ -484,7 +484,7 @@ Context *VKBackend::context_alloc(void *ghost_window, void *ghost_context)
    device.init(ghost_context);
  }

-  VKContext *context = new VKContext(ghost_window, ghost_context, device.resources);
+  VKContext *context = new VKContext(ghost_window, ghost_context);
  device.context_register(*context);
  GHOST_SetVulkanSwapBuffersCallbacks((GHOST_ContextHandle)ghost_context,
                                      VKContext::swap_buffers_pre_callback,
@@ -564,23 +564,9 @@ void VKBackend::render_end()
    if (thread_data.rendering_depth == 0) {
      VKContext *context = VKContext::get();
      if (context != nullptr) {
-        context->flush_render_graph();
+        context->flush_render_graph(RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
      }
-
-      thread_data.resource_pool_next();
-      VKResourcePool &resource_pool = thread_data.resource_pool_get();
-      resource_pool.discard_pool.destroy_discarded_resources(device);
-      resource_pool.reset();
-    }
-  }
-
-  else if (!BLI_thread_is_main()) {
-    /* Foreground rendering using a worker/render thread. In this case we move the resources to the
-     * device discard list and it will be cleared by the main thread. */
-    if (thread_data.rendering_depth == 0) {
-      VKResourcePool &resource_pool = thread_data.resource_pool_get();
-      device.orphaned_data.move_data(resource_pool.discard_pool);
-      resource_pool.reset();
+      device.orphaned_data.destroy_discarded_resources(device);
    }
  }
 }
--- a/source/blender/gpu/vulkan/vk_batch.cc
+++ b/source/blender/gpu/vulkan/vk_batch.cc
@@ -47,7 +47,7 @@ void VKBatch::draw(int vertex_first, int vertex_count, int instance_first, int i
    vao.bind(draw_indexed.node_data.vertex_buffers);
    context.update_pipeline_data(prim_type, vao, draw_indexed.node_data.pipeline_data);

-    context.render_graph.add_node(draw_indexed);
+    context.render_graph().add_node(draw_indexed);
  }
  else {
    render_graph::VKDrawNode::CreateInfo draw(resource_access_info);
@@ -58,7 +58,7 @@ void VKBatch::draw(int vertex_first, int vertex_count, int instance_first, int i
    vao.bind(draw.node_data.vertex_buffers);
    context.update_pipeline_data(prim_type, vao, draw.node_data.pipeline_data);

-    context.render_graph.add_node(draw);
+    context.render_graph().add_node(draw);
  }
 }

@@ -108,7 +108,7 @@ void VKBatch::multi_draw_indirect(const VkBuffer indirect_buffer,
    vao.bind(draw_indexed_indirect.node_data.vertex_buffers);
    context.update_pipeline_data(prim_type, vao, draw_indexed_indirect.node_data.pipeline_data);

-    context.render_graph.add_node(draw_indexed_indirect);
+    context.render_graph().add_node(draw_indexed_indirect);
  }
  else {
    render_graph::VKDrawIndirectNode::CreateInfo draw(resource_access_info);
@@ -119,7 +119,7 @@ void VKBatch::multi_draw_indirect(const VkBuffer indirect_buffer,
    vao.bind(draw.node_data.vertex_buffers);
    context.update_pipeline_data(prim_type, vao, draw.node_data.pipeline_data);

-    context.render_graph.add_node(draw);
+    context.render_graph().add_node(draw);
  }
 }

--- a/source/blender/gpu/vulkan/vk_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_buffer.cc
@@ -90,7 +90,7 @@ void VKBuffer::update_render_graph(VKContext &context, void *data) const
  update_buffer.dst_buffer = vk_buffer_;
  update_buffer.data_size = size_in_bytes_;
  update_buffer.data = data;
-  context.render_graph.add_node(update_buffer);
+  context.render_graph().add_node(update_buffer);
 }

 void VKBuffer::flush() const
@@ -106,7 +106,7 @@ void VKBuffer::clear(VKContext &context, uint32_t clear_value)
  fill_buffer.vk_buffer = vk_buffer_;
  fill_buffer.data = clear_value;
  fill_buffer.size = size_in_bytes_;
-  context.render_graph.add_node(fill_buffer);
+  context.render_graph().add_node(fill_buffer);
 }

 void VKBuffer::read(VKContext &context, void *data) const
@@ -114,7 +114,9 @@ void VKBuffer::read(VKContext &context, void *data) const
  BLI_assert_msg(is_mapped(), "Cannot read a non-mapped buffer.");
  context.rendering_end();
  context.descriptor_set_get().upload_descriptor_sets();
-  context.render_graph.submit_for_read();
+  context.flush_render_graph(RenderGraphFlushFlags::SUBMIT |
+                             RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
+                             RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
  memcpy(data, mapped_memory_, size_in_bytes_);
 }

@@ -153,8 +155,7 @@ bool VKBuffer::free()
    unmap();
  }

-  VKDevice &device = VKBackend::get().device;
-  device.discard_pool_for_current_thread().discard_buffer(vk_buffer_, allocation_);
+  VKDiscardPool::discard_pool_get().discard_buffer(vk_buffer_, allocation_);

  allocation_ = VK_NULL_HANDLE;
  vk_buffer_ = VK_NULL_HANDLE;
--- a/source/blender/gpu/vulkan/vk_common.hh
+++ b/source/blender/gpu/vulkan/vk_common.hh
@@ -25,6 +25,8 @@

 namespace blender::gpu {

+using TimelineValue = uint64_t;
+
 /**
 * Based on the usage of an Image View a different image view type should be created.
 *
--- a/source/blender/gpu/vulkan/vk_context.cc
+++ b/source/blender/gpu/vulkan/vk_context.cc
@@ -22,12 +22,7 @@

 namespace blender::gpu {

-VKContext::VKContext(void *ghost_window,
-                     void *ghost_context,
-                     render_graph::VKResourceStateTracker &resources)
-    : render_graph(std::make_unique<render_graph::VKCommandBufferWrapper>(
-                       VKBackend::get().device.workarounds_get()),
-                   resources)
+VKContext::VKContext(void *ghost_window, void *ghost_context)
 {
  ghost_window_ = ghost_window;
  ghost_context_ = ghost_context;
@@ -58,7 +53,6 @@ VKContext::~VKContext()

 void VKContext::sync_backbuffer(bool cycle_resource_pool)
 {
-  VKDevice &device = VKBackend::get().device;
  if (ghost_window_) {
    GHOST_VulkanSwapChainData swap_chain_data = {};
    GHOST_GetVulkanSwapChainFormat((GHOST_WindowHandle)ghost_window_, &swap_chain_data);
@@ -67,9 +61,6 @@ void VKContext::sync_backbuffer(bool cycle_resource_pool)
      thread_data.resource_pool_next();
      VKResourcePool &resource_pool = thread_data.resource_pool_get();
      imm = &resource_pool.immediate;
-      resource_pool.discard_pool.destroy_discarded_resources(device);
-      resource_pool.reset();
-      resource_pool.discard_pool.move_data(device.orphaned_data);
    }

    const bool reset_framebuffer = swap_chain_format_.format !=
@@ -106,11 +97,6 @@ void VKContext::sync_backbuffer(bool cycle_resource_pool)
      vk_extent_ = swap_chain_data.extent;
    }
  }
-#if 0
-  else (is_background) {
-    discard all orphaned data
-  }
-#endif
 }

 void VKContext::activate()
@@ -122,6 +108,14 @@ void VKContext::activate()
  VKThreadData &thread_data = device.current_thread_data();
  thread_data_ = std::reference_wrapper<VKThreadData>(thread_data);

+  if (!render_graph_.has_value()) {
+    render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
+        *device.render_graph_new());
+    for (const StringRef &group : debug_stack) {
+      debug_group_begin(std::string(group).c_str(), 0);
+    }
+  }
+
  imm = &thread_data.resource_pool_get().immediate;

  is_active_ = true;
@@ -133,24 +127,26 @@ void VKContext::activate()

 void VKContext::deactivate()
 {
-  flush_render_graph();
+  flush_render_graph(RenderGraphFlushFlags(0));
  immDeactivate();
  imm = nullptr;
  thread_data_.reset();
+
  is_active_ = false;
 }

 void VKContext::begin_frame() {}

-void VKContext::end_frame() {}
+void VKContext::end_frame()
+{
+  VKDevice &device = VKBackend::get().device;
+  device.orphaned_data.destroy_discarded_resources(device);
+}

 void VKContext::flush() {}

-void VKContext::flush_render_graph()
+TimelineValue VKContext::flush_render_graph(RenderGraphFlushFlags flags)
 {
-  if (render_graph.is_empty()) {
-    return;
-  }
  if (has_active_framebuffer()) {
    VKFrameBuffer &framebuffer = *active_framebuffer_get();
    if (framebuffer.is_rendering()) {
@@ -158,7 +154,21 @@ void VKContext::flush_render_graph()
    }
  }
  descriptor_set_get().upload_descriptor_sets();
-  render_graph.submit();
+  VKDevice &device = VKBackend::get().device;
+  TimelineValue timeline = device.render_graph_submit(
+      &render_graph_.value().get(),
+      discard_pool,
+      bool(flags & RenderGraphFlushFlags::SUBMIT),
+      bool(flags & RenderGraphFlushFlags::WAIT_FOR_COMPLETION));
+  render_graph_.reset();
+  if (bool(flags & RenderGraphFlushFlags::RENEW_RENDER_GRAPH)) {
+    render_graph_ = std::reference_wrapper<render_graph::VKRenderGraph>(
+        *device.render_graph_new());
+    for (const StringRef &group : debug_stack) {
+      debug_group_begin(std::string(group).c_str(), 0);
+    }
+  }
+  return timeline;
 }

 void VKContext::finish() {}
@@ -356,10 +366,18 @@ void VKContext::swap_buffers_pre_handler(const GHOST_VulkanSwapChainData &swap_c
  device.resources.add_image(swap_chain_data.image, 1, "SwapchainImage");

  framebuffer.rendering_end(*this);
+  render_graph::VKRenderGraph &render_graph = this->render_graph();
  render_graph.add_node(blit_image);
  GPU_debug_group_end();
  descriptor_set_get().upload_descriptor_sets();
-  render_graph.submit_for_present(swap_chain_data.image);
+  render_graph::VKSynchronizationNode::CreateInfo synchronization = {};
+  synchronization.vk_image = swap_chain_data.image;
+  synchronization.vk_image_layout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
+  synchronization.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+  render_graph.add_node(synchronization);
+  flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
+                     RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
+
  device.resources.remove_image(swap_chain_data.image);
 #if 0
  device.debug_print();
--- a/source/blender/gpu/vulkan/vk_context.hh
+++ b/source/blender/gpu/vulkan/vk_context.hh
@@ -8,6 +8,8 @@

 #pragma once

+#include "BLI_utildefines.h"
+
 #include "gpu_context_private.hh"

 #include "GHOST_Types.h"
@@ -26,6 +28,14 @@ class VKStateManager;
 class VKShader;
 class VKThreadData;

+enum RenderGraphFlushFlags {
+  NONE = 0,
+  RENEW_RENDER_GRAPH = 1 << 0,
+  SUBMIT = 1 << 1,
+  WAIT_FOR_COMPLETION = 1 << 2,
+};
+ENUM_OPERATORS(RenderGraphFlushFlags, RenderGraphFlushFlags::WAIT_FOR_COMPLETION);
+
 class VKContext : public Context, NonCopyable {
 private:
  VkExtent2D vk_extent_ = {};
@@ -37,13 +47,21 @@ class VKContext : public Context, NonCopyable {
  render_graph::VKResourceAccessInfo access_info_ = {};

  std::optional<std::reference_wrapper<VKThreadData>> thread_data_;
+  std::optional<std::reference_wrapper<render_graph::VKRenderGraph>> render_graph_;

 public:
-  render_graph::VKRenderGraph render_graph;
+  VKDiscardPool discard_pool;

-  VKContext(void *ghost_window,
-            void *ghost_context,
-            render_graph::VKResourceStateTracker &resources);
+  const render_graph::VKRenderGraph &render_graph() const
+  {
+    return render_graph_.value().get();
+  }
+  render_graph::VKRenderGraph &render_graph()
+  {
+    return render_graph_.value().get();
+  }
+
+  VKContext(void *ghost_window, void *ghost_context);
  virtual ~VKContext();

  void activate() override;
@@ -52,7 +70,8 @@ class VKContext : public Context, NonCopyable {
  void end_frame() override;

  void flush() override;
-  void flush_render_graph();
+
+  TimelineValue flush_render_graph(RenderGraphFlushFlags flags);
  void finish() override;

  void memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb) override;
--- a/source/blender/gpu/vulkan/vk_debug.cc
+++ b/source/blender/gpu/vulkan/vk_debug.cc
@@ -21,17 +21,18 @@ static CLG_LogRef LOG = {"gpu.vulkan"};
 namespace blender::gpu {
 void VKContext::debug_group_begin(const char *name, int)
 {
-  render_graph.debug_group_begin(name, debug::get_debug_group_color(name));
+  render_graph().debug_group_begin(name, debug::get_debug_group_color(name));
 }

 void VKContext::debug_group_end()
 {
-  render_graph.debug_group_end();
+  render_graph().debug_group_end();
 }

 bool VKContext::debug_capture_begin(const char *title)
 {
-  flush_render_graph();
+  flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
+                     RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
  return VKBackend::get().debug_capture_begin(title);
 }

@@ -51,7 +52,8 @@ bool VKBackend::debug_capture_begin(const char *title)

 void VKContext::debug_capture_end()
 {
-  flush_render_graph();
+  flush_render_graph(RenderGraphFlushFlags::SUBMIT | RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
+                     RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
  VKBackend::get().debug_capture_end();
 }

--- a/source/blender/gpu/vulkan/vk_device.cc
+++ b/source/blender/gpu/vulkan/vk_device.cc
@@ -37,6 +37,9 @@ void VKDevice::deinit()
  if (!is_initialized()) {
    return;
  }
+  lifetime = Lifetime::DEINITIALIZING;
+
+  deinit_submission_pool();

  dummy_buffer.free();
  samplers_.free();
@@ -52,9 +55,15 @@ void VKDevice::deinit()
  pipelines.write_to_disk();
  pipelines.free_data();
  descriptor_set_layouts_.deinit();
+  orphaned_data.deinit(*this);
  vmaDestroyAllocator(mem_allocator_);
  mem_allocator_ = VK_NULL_HANDLE;

+  while (!render_graphs_.is_empty()) {
+    render_graph::VKRenderGraph *render_graph = render_graphs_.pop_last();
+    MEM_delete<render_graph::VKRenderGraph>(render_graph);
+  }
+
  debugging_tools_.deinit(vk_instance_);

  vk_instance_ = VK_NULL_HANDLE;
@@ -64,11 +73,12 @@ void VKDevice::deinit()
  vk_queue_ = VK_NULL_HANDLE;
  vk_physical_device_properties_ = {};
  glsl_patch_.clear();
+  lifetime = Lifetime::DESTROYED;
 }

 bool VKDevice::is_initialized() const
 {
-  return vk_device_ != VK_NULL_HANDLE;
+  return lifetime == Lifetime::RUNNING;
 }

 void VKDevice::init(void *ghost_context)
@@ -105,6 +115,10 @@ void VKDevice::init(void *ghost_context)

  resources.use_dynamic_rendering = !workarounds_.dynamic_rendering;
  resources.use_dynamic_rendering_local_read = !workarounds_.dynamic_rendering_local_read;
+  orphaned_data.timeline_ = timeline_value_ + 1;
+
+  init_submission_pool();
+  lifetime = Lifetime::RUNNING;
 }

 void VKDevice::init_functions()
@@ -349,6 +363,214 @@ std::string VKDevice::driver_version() const

 /** \} */

+/* -------------------------------------------------------------------- */
+/** \name Render graph
+ * \{ */
+
+struct VKRenderGraphSubmitTask {
+  render_graph::VKRenderGraph *render_graph;
+  uint64_t timeline;
+  bool submit_to_device;
+};
+
+TimelineValue VKDevice::render_graph_submit(render_graph::VKRenderGraph *render_graph,
+                                            VKDiscardPool &context_discard_pool,
+                                            bool submit_to_device,
+                                            bool wait_for_completion)
+{
+  if (render_graph->is_empty()) {
+    render_graph->reset();
+    BLI_thread_queue_push(unused_render_graphs_, render_graph);
+    return 0;
+  }
+
+  VKRenderGraphSubmitTask *submit_task = MEM_new<VKRenderGraphSubmitTask>(__func__);
+  submit_task->render_graph = render_graph;
+  submit_task->submit_to_device = submit_to_device;
+  TimelineValue timeline = submit_task->timeline = submit_to_device ? ++timeline_value_ :
+                                                                      timeline_value_ + 1;
+  orphaned_data.timeline_ = timeline + 1;
+  orphaned_data.move_data(context_discard_pool, timeline);
+  BLI_thread_queue_push(submitted_render_graphs_, submit_task);
+  submit_task = nullptr;
+
+  if (wait_for_completion) {
+    wait_for_timeline(timeline);
+  }
+  return timeline;
+}
+
+void VKDevice::wait_for_timeline(TimelineValue timeline)
+{
+  if (timeline == 0) {
+    return;
+  }
+  VkSemaphoreWaitInfo vk_semaphore_wait_info = {
+      VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, nullptr, 0, 1, &vk_timeline_semaphore_, &timeline};
+  vkWaitSemaphores(vk_device_, &vk_semaphore_wait_info, UINT64_MAX);
+}
+
+render_graph::VKRenderGraph *VKDevice::render_graph_new()
+{
+  render_graph::VKRenderGraph *render_graph = static_cast<render_graph::VKRenderGraph *>(
+      BLI_thread_queue_pop_timeout(unused_render_graphs_, 0));
+  if (render_graph) {
+    return render_graph;
+  }
+
+  std::scoped_lock lock(resources.mutex);
+  render_graph = MEM_new<render_graph::VKRenderGraph>(__func__, resources);
+  render_graphs_.append(render_graph);
+  return render_graph;
+}
+
+void VKDevice::submission_runner(TaskPool *__restrict pool, void *task_data)
+{
+  UNUSED_VARS(task_data);
+
+  VKDevice *device = static_cast<VKDevice *>(BLI_task_pool_user_data(pool));
+  VkCommandPool vk_command_pool = VK_NULL_HANDLE;
+  VkCommandPoolCreateInfo vk_command_pool_create_info = {
+      VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+      nullptr,
+      VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+      device->vk_queue_family_};
+  vkCreateCommandPool(device->vk_device_, &vk_command_pool_create_info, nullptr, &vk_command_pool);
+
+  render_graph::VKScheduler scheduler;
+  render_graph::VKCommandBuilder command_builder;
+  Vector<VkCommandBuffer> command_buffers_unused;
+  TimelineResources<VkCommandBuffer> command_buffers_in_use;
+  VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE;
+  std::optional<render_graph::VKCommandBufferWrapper> command_buffer;
+
+  while (device->lifetime < Lifetime::DEINITIALIZING) {
+    VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
+        BLI_thread_queue_pop_timeout(device->submitted_render_graphs_, 1));
+    if (submit_task == nullptr) {
+      continue;
+    }
+
+    if (!command_buffer.has_value()) {
+      /* Check for completed command buffers that can be reused. */
+      if (command_buffers_unused.is_empty()) {
+        uint64_t current_timeline = device->submission_finished_timeline_get();
+        command_buffers_in_use.remove_old(current_timeline,
+                                          [&](VkCommandBuffer vk_command_buffer) {
+                                            command_buffers_unused.append(vk_command_buffer);
+                                          });
+      }
+
+      /* Create new command buffers when there are no left to be reused. */
+      if (command_buffers_unused.is_empty()) {
+        command_buffers_unused.resize(10, VK_NULL_HANDLE);
+        VkCommandBufferAllocateInfo vk_command_buffer_allocate_info = {
+            VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+            nullptr,
+            vk_command_pool,
+            VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+            10};
+        vkAllocateCommandBuffers(
+            device->vk_device_, &vk_command_buffer_allocate_info, command_buffers_unused.data());
+      };
+
+      vk_command_buffer = command_buffers_unused.pop_last();
+      command_buffer = std::make_optional<render_graph::VKCommandBufferWrapper>(
+          vk_command_buffer, device->workarounds_);
+      command_buffer->begin_recording();
+    }
+
+    BLI_assert(vk_command_buffer != VK_NULL_HANDLE);
+
+    render_graph::VKRenderGraph &render_graph = *submit_task->render_graph;
+    Span<render_graph::NodeHandle> node_handles;
+    {
+      std::scoped_lock lock_resources(device->resources.mutex);
+      node_handles = scheduler.select_nodes(render_graph);
+      command_builder.build_nodes(render_graph, *command_buffer, node_handles);
+    }
+    command_builder.record_commands(render_graph, *command_buffer, node_handles);
+
+    if (submit_task->submit_to_device) {
+      command_buffer->end_recording();
+      VkTimelineSemaphoreSubmitInfo vk_timeline_semaphore_submit_info = {
+          VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+          nullptr,
+          0,
+          nullptr,
+          1,
+          &submit_task->timeline};
+      VkSubmitInfo vk_submit_info = {VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                                     &vk_timeline_semaphore_submit_info,
+                                     0,
+                                     nullptr,
+                                     nullptr,
+                                     1,
+                                     &vk_command_buffer,
+                                     1,
+                                     &device->vk_timeline_semaphore_};
+
+      {
+        std::scoped_lock lock_queue(*device->queue_mutex_);
+        vkQueueSubmit(device->vk_queue_, 1, &vk_submit_info, VK_NULL_HANDLE);
+      }
+      command_buffers_in_use.append_timeline(submit_task->timeline, vk_command_buffer);
+      vk_command_buffer = VK_NULL_HANDLE;
+      command_buffer.reset();
+    }
+
+    render_graph.reset();
+    BLI_thread_queue_push(device->unused_render_graphs_, std::move(submit_task->render_graph));
+    MEM_delete<VKRenderGraphSubmitTask>(submit_task);
+  }
+
+  /* Clear command buffers and pool */
+  vkDeviceWaitIdle(device->vk_device_);
+  command_buffers_in_use.remove_old(UINT64_MAX, [&](VkCommandBuffer vk_command_buffer) {
+    command_buffers_unused.append(vk_command_buffer);
+  });
+  vkFreeCommandBuffers(device->vk_device_,
+                       vk_command_pool,
+                       command_buffers_unused.size(),
+                       command_buffers_unused.data());
+  vkDestroyCommandPool(device->vk_device_, vk_command_pool, nullptr);
+}  // namespace blender::gpu
+
+void VKDevice::init_submission_pool()
+{
+  submission_pool_ = BLI_task_pool_create_background_serial(this, TASK_PRIORITY_HIGH);
+  BLI_task_pool_push(submission_pool_, VKDevice::submission_runner, nullptr, false, nullptr);
+  submitted_render_graphs_ = BLI_thread_queue_init();
+  unused_render_graphs_ = BLI_thread_queue_init();
+
+  VkSemaphoreTypeCreateInfo vk_semaphore_type_create_info = {
+      VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, nullptr, VK_SEMAPHORE_TYPE_TIMELINE, 0};
+  VkSemaphoreCreateInfo vk_semaphore_create_info = {
+      VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, &vk_semaphore_type_create_info, 0};
+  vkCreateSemaphore(vk_device_, &vk_semaphore_create_info, nullptr, &vk_timeline_semaphore_);
+}
+
+void VKDevice::deinit_submission_pool()
+{
+  BLI_task_pool_free(submission_pool_);
+  submission_pool_ = nullptr;
+
+  while (!BLI_thread_queue_is_empty(submitted_render_graphs_)) {
+    VKRenderGraphSubmitTask *submit_task = static_cast<VKRenderGraphSubmitTask *>(
+        BLI_thread_queue_pop(submitted_render_graphs_));
+    MEM_delete<VKRenderGraphSubmitTask>(submit_task);
+  }
+  BLI_thread_queue_free(submitted_render_graphs_);
+  submitted_render_graphs_ = nullptr;
+  BLI_thread_queue_free(unused_render_graphs_);
+  unused_render_graphs_ = nullptr;
+
+  vkDestroySemaphore(vk_device_, vk_timeline_semaphore_, nullptr);
+  vk_timeline_semaphore_ = VK_NULL_HANDLE;
+}
+
+/** \} */
+
 /* -------------------------------------------------------------------- */
 /** \name VKThreadData
 * \{ */
@@ -389,6 +611,7 @@ VKThreadData &VKDevice::current_thread_data()
  return *thread_data;
 }

+#if 0
 VKDiscardPool &VKDevice::discard_pool_for_current_thread(bool thread_safe)
 {
  std::unique_lock lock(resources.mutex, std::defer_lock);
@@ -406,6 +629,7 @@ VKDiscardPool &VKDevice::discard_pool_for_current_thread(bool thread_safe)

  return orphaned_data;
 }
+#endif

 void VKDevice::context_register(VKContext &context)
 {
@@ -414,6 +638,7 @@ void VKDevice::context_register(VKContext &context)

 void VKDevice::context_unregister(VKContext &context)
 {
+  orphaned_data.move_data(context.discard_pool, timeline_value_ + 1);
  contexts_.remove(contexts_.first_index_of(std::reference_wrapper(context)));
 }
 Span<std::reference_wrapper<VKContext>> VKDevice::contexts_get() const
@@ -498,29 +723,16 @@ void VKDevice::debug_print()
    os << "ThreadData" << (is_main ? " (main-thread)" : "") << ")\n";
    os << " Rendering_depth: " << thread_data->rendering_depth << "\n";
    for (int resource_pool_index : IndexRange(thread_data->resource_pools.size())) {
-      const VKResourcePool &resource_pool = thread_data->resource_pools[resource_pool_index];
      const bool is_active = thread_data->resource_pool_index == resource_pool_index;
      os << " Resource Pool (index=" << resource_pool_index << (is_active ? " active" : "")
         << ")\n";
-      debug_print(os, resource_pool.discard_pool);
    }
  }
-  os << "Orphaned data\n";
+  os << "Discard pool\n";
  debug_print(os, orphaned_data);
  os << "\n";
 }

-void VKDevice::free_command_pool_buffers(VkCommandPool vk_command_pool)
-{
-  std::scoped_lock mutex(resources.mutex);
-  for (VKThreadData *thread_data : thread_data_) {
-    for (VKResourcePool &resource_pool : thread_data->resource_pools) {
-      resource_pool.discard_pool.free_command_pool_buffers(vk_command_pool, *this);
-    }
-  }
-  orphaned_data.free_command_pool_buffers(vk_command_pool, *this);
-}
-
 /** \} */

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_device.hh
+++ b/source/blender/gpu/vulkan/vk_device.hh
@@ -8,6 +8,10 @@

 #pragma once

+#include <atomic>
+
+#include "BLI_task.h"
+#include "BLI_threads.h"
 #include "BLI_utility_mixins.hh"
 #include "BLI_vector.hh"

@@ -149,6 +153,35 @@ class VKDevice : public NonCopyable {
  VkQueue vk_queue_ = VK_NULL_HANDLE;
  std::mutex *queue_mutex_ = nullptr;

+  /**
+   * Lifetime of the device.
+   *
+   * Used for deinitialization of the command builder thread.
+   */
+  enum Lifetime {
+    UNINITIALIZED,
+    RUNNING,
+    DEINITIALIZING,
+    DESTROYED,
+  };
+  Lifetime lifetime = Lifetime::UNINITIALIZED;
+  /**
+   * Task pool for render graph submission.
+   *
+   * Multiple threads in Blender can build a render graph. Building the command buffer for a render
+   * graph is faster when doing it in serial. Submission pool ensures that only one task is
+   * building at a time (background_serial).
+   */
+  TaskPool *submission_pool_ = nullptr;
+  /**
+   * All created render graphs.
+   */
+  Vector<render_graph::VKRenderGraph *> render_graphs_;
+  ThreadQueue *submitted_render_graphs_ = nullptr;
+  ThreadQueue *unused_render_graphs_ = nullptr;
+  VkSemaphore vk_timeline_semaphore_ = VK_NULL_HANDLE;
+  std::atomic<uint_least64_t> timeline_value_ = 0;
+
  VKSamplers samplers_;
  VKDescriptorSetLayouts descriptor_set_layouts_;

@@ -313,6 +346,31 @@ class VKDevice : public NonCopyable {
  const char *glsl_patch_get() const;
  void init_glsl_patch();

+  /* -------------------------------------------------------------------- */
+  /** \name Render graph
+   * \{ */
+  static void submission_runner(TaskPool *__restrict pool, void *task_data);
+  render_graph::VKRenderGraph *render_graph_new();
+
+  TimelineValue render_graph_submit(render_graph::VKRenderGraph *render_graph,
+                                    VKDiscardPool &context_discard_pool,
+                                    bool submit_to_device,
+                                    bool wait_for_completion);
+  void wait_for_timeline(TimelineValue timeline);
+
+  /**
+   * Retrieve the last finished submission timeline.
+   */
+  TimelineValue submission_finished_timeline_get() const
+  {
+    BLI_assert(vk_timeline_semaphore_ != VK_NULL_HANDLE);
+    TimelineValue current_timeline;
+    vkGetSemaphoreCounterValue(vk_device_, vk_timeline_semaphore_, &current_timeline);
+    return current_timeline;
+  }
+
+  /** \} */
+
  /* -------------------------------------------------------------------- */
  /** \name Resource management
   * \{ */
@@ -322,6 +380,7 @@ class VKDevice : public NonCopyable {
   */
  VKThreadData &current_thread_data();

+#if 0
  /**
   * Get the discard pool for the current thread.
   *
@@ -337,6 +396,7 @@ class VKDevice : public NonCopyable {
   * function without trying to reacquire resources mutex making a deadlock.
   */
  VKDiscardPool &discard_pool_for_current_thread(bool thread_safe = false);
+#endif

  void context_register(VKContext &context);
  void context_unregister(VKContext &context);
@@ -346,8 +406,6 @@ class VKDevice : public NonCopyable {
  static void debug_print(std::ostream &os, const VKDiscardPool &discard_pool);
  void debug_print();

-  void free_command_pool_buffers(VkCommandPool vk_command_pool);
-
  /** \} */

 private:
@@ -357,6 +415,8 @@ class VKDevice : public NonCopyable {
  void init_physical_device_extensions();
  void init_debug_callbacks();
  void init_memory_allocator();
+  void init_submission_pool();
+  void deinit_submission_pool();
  /**
   * Initialize the functions struct with extension specific function pointer.
   */
--- a/source/blender/gpu/vulkan/vk_fence.cc
+++ b/source/blender/gpu/vulkan/vk_fence.cc
@@ -13,39 +13,18 @@

 namespace blender::gpu {

-VKFence::~VKFence()
-{
-  if (vk_fence_ != VK_NULL_HANDLE) {
-    VKDevice &device = VKBackend::get().device;
-    vkDestroyFence(device.vk_handle(), vk_fence_, nullptr);
-    vk_fence_ = VK_NULL_HANDLE;
-  }
-}
-
 void VKFence::signal()
 {
-  if (vk_fence_ == VK_NULL_HANDLE) {
-    VKDevice &device = VKBackend::get().device;
-    VkFenceCreateInfo vk_fence_create_info = {};
-    vk_fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
-    vk_fence_create_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
-    vkCreateFence(device.vk_handle(), &vk_fence_create_info, nullptr, &vk_fence_);
-  }
  VKContext &context = *VKContext::get();
-  context.rendering_end();
-  context.descriptor_set_get().upload_descriptor_sets();
-  context.render_graph.submit_synchronization_event(vk_fence_);
-  signalled_ = true;
+  timeline_value_ = context.flush_render_graph(RenderGraphFlushFlags::SUBMIT |
+                                               RenderGraphFlushFlags::RENEW_RENDER_GRAPH);
 }

 void VKFence::wait()
 {
-  if (!signalled_) {
-    return;
-  }
-  VKContext &context = *VKContext::get();
-  context.render_graph.wait_synchronization_event(vk_fence_);
-  signalled_ = false;
+  VKDevice &device = VKBackend::get().device;
+  device.wait_for_timeline(timeline_value_);
+  timeline_value_ = 0;
 }

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_fence.hh
+++ b/source/blender/gpu/vulkan/vk_fence.hh
@@ -16,11 +16,7 @@ namespace blender::gpu {

 class VKFence : public Fence {
 private:
-  VkFence vk_fence_ = VK_NULL_HANDLE;
-  bool signalled_ = false;
-
- protected:
-  virtual ~VKFence();
+  TimelineValue timeline_value_;

 public:
  void signal() override;
--- a/source/blender/gpu/vulkan/vk_framebuffer.cc
+++ b/source/blender/gpu/vulkan/vk_framebuffer.cc
@@ -47,13 +47,13 @@ VKFrameBuffer::~VKFrameBuffer()

 void VKFrameBuffer::render_pass_free()
 {
-  VKDevice &device = VKBackend::get().device;
+  VKDiscardPool &discard_pool = VKDiscardPool::discard_pool_get();
  if (vk_framebuffer != VK_NULL_HANDLE) {
-    device.discard_pool_for_current_thread().discard_framebuffer(vk_framebuffer);
+    discard_pool.discard_framebuffer(vk_framebuffer);
    vk_framebuffer = VK_NULL_HANDLE;
  }
  if (vk_render_pass != VK_NULL_HANDLE) {
-    device.discard_pool_for_current_thread().discard_render_pass(vk_render_pass);
+    discard_pool.discard_render_pass(vk_render_pass);
    vk_render_pass = VK_NULL_HANDLE;
  }
 }
@@ -202,7 +202,7 @@ void VKFrameBuffer::clear(render_graph::VKClearAttachmentsNode::CreateInfo &clea
 {
  VKContext &context = *VKContext::get();
  rendering_ensure(context);
-  context.render_graph.add_node(clear_attachments);
+  context.render_graph().add_node(clear_attachments);
 }

 void VKFrameBuffer::clear(const eGPUFrameBufferBits buffers,
@@ -475,7 +475,7 @@ static void blit_aspect(VKContext &context,
                                  dst_texture.height_get());
  region.dstOffsets[1].z = 1;

-  context.render_graph.add_node(blit_image);
+  context.render_graph().add_node(blit_image);
 }

 void VKFrameBuffer::blit_to(eGPUFrameBufferBits planes,
@@ -780,7 +780,7 @@ void VKFrameBuffer::rendering_ensure_render_pass(VKContext &context)
  begin_info.framebuffer = vk_framebuffer;
  render_area_update(begin_info.renderArea);

-  context.render_graph.add_node(begin_rendering);
+  context.render_graph().add_node(begin_rendering);

  /* Load store operations are not supported inside a render pass.
   * It requires duplicating render passes and frame-buffers to support suspend/resume rendering.
@@ -813,7 +813,7 @@ void VKFrameBuffer::rendering_ensure_render_pass(VKContext &context)
      render_area_update(clear_attachments.vk_clear_rect.rect);
      clear_attachments.vk_clear_rect.baseArrayLayer = 0;
      clear_attachments.vk_clear_rect.layerCount = 1;
-      context.render_graph.add_node(clear_attachments);
+      context.render_graph().add_node(clear_attachments);
    }
  }
 }
@@ -970,7 +970,7 @@ void VKFrameBuffer::rendering_ensure_dynamic_rendering(VKContext &context,
    break;
  }

-  context.render_graph.add_node(begin_rendering);
+  context.render_graph().add_node(begin_rendering);
 }

 void VKFrameBuffer::rendering_ensure(VKContext &context)
@@ -1030,7 +1030,7 @@ void VKFrameBuffer::rendering_end(VKContext &context)
      BLI_assert(vk_render_pass);
      end_rendering.vk_render_pass = vk_render_pass;
    }
-    context.render_graph.add_node(end_rendering);
+    context.render_graph().add_node(end_rendering);
    is_rendering_ = false;
  }
 }
--- a/source/blender/gpu/vulkan/vk_image_view.cc
+++ b/source/blender/gpu/vulkan/vk_image_view.cc
@@ -73,8 +73,7 @@ VKImageView::VKImageView(VKImageView &&other) : info(other.info)
 VKImageView::~VKImageView()
 {
  if (vk_image_view_ != VK_NULL_HANDLE) {
-    VKDevice &device = VKBackend::get().device;
-    device.discard_pool_for_current_thread().discard_image_view(vk_image_view_);
+    VKDiscardPool::discard_pool_get().discard_image_view(vk_image_view_);
    vk_image_view_ = VK_NULL_HANDLE;
  }
  vk_format_ = VK_FORMAT_UNDEFINED;
--- a/source/blender/gpu/vulkan/vk_immediate.cc
+++ b/source/blender/gpu/vulkan/vk_immediate.cc
@@ -117,7 +117,7 @@ void VKImmediate::end()
    vertex_attributes_.bind(draw.node_data.vertex_buffers);
    context.update_pipeline_data(prim_type, vertex_attributes_, draw.node_data.pipeline_data);

-    context.render_graph.add_node(draw);
+    context.render_graph().add_node(draw);
  }

  buffer_offset_ += current_subbuffer_len_;
--- a/source/blender/gpu/vulkan/vk_query.cc
+++ b/source/blender/gpu/vulkan/vk_query.cc
@@ -67,13 +67,13 @@ void VKQueryPool::begin_query()
    reset_query_pool.vk_query_pool = vk_query_pool;
    reset_query_pool.first_query = 0;
    reset_query_pool.query_count = query_chunk_len_;
-    context.render_graph.add_node(reset_query_pool);
+    context.render_graph().add_node(reset_query_pool);
  }

  render_graph::VKBeginQueryNode::Data begin_query = {};
  begin_query.vk_query_pool = vk_query_pool;
  begin_query.query_index = query_index_in_pool();
-  context.render_graph.add_node(begin_query);
+  context.render_graph().add_node(begin_query);
 }

 void VKQueryPool::end_query()
@@ -82,7 +82,7 @@ void VKQueryPool::end_query()
  render_graph::VKEndQueryNode::Data end_query = {};
  end_query.vk_query_pool = vk_query_pools_.last();
  end_query.query_index = query_index_in_pool();
-  context.render_graph.add_node(end_query);
+  context.render_graph().add_node(end_query);
  queries_issued_ += 1;
 }

@@ -93,7 +93,9 @@ void VKQueryPool::get_occlusion_result(MutableSpan<uint32_t> r_values)
   * ensure the END_RENDERING node */
  context.rendering_end();
  context.descriptor_set_get().upload_descriptor_sets();
-  context.render_graph.submit();
+  context.flush_render_graph(RenderGraphFlushFlags::SUBMIT |
+                             RenderGraphFlushFlags::WAIT_FOR_COMPLETION |
+                             RenderGraphFlushFlags::RENEW_RENDER_GRAPH);

  int queries_left = queries_issued_;
  int pool_index = 0;
--- a/source/blender/gpu/vulkan/vk_resource_pool.cc
+++ b/source/blender/gpu/vulkan/vk_resource_pool.cc
@@ -8,6 +8,7 @@

 #include "vk_resource_pool.hh"
 #include "vk_backend.hh"
+#include "vk_context.hh"

 namespace blender::gpu {

@@ -19,7 +20,6 @@ void VKResourcePool::init(VKDevice &device)
 void VKResourcePool::deinit(VKDevice &device)
 {
  immediate.deinit(device);
-  discard_pool.deinit(device);
 }

 void VKResourcePool::reset()
@@ -30,13 +30,19 @@ void VKResourcePool::reset()

 void VKDiscardPool::deinit(VKDevice &device)
 {
-  destroy_discarded_resources(device);
+  destroy_discarded_resources(device, true);
 }

-void VKDiscardPool::move_data(VKDiscardPool &src_pool)
+void VKDiscardPool::move_data(VKDiscardPool &src_pool, TimelineValue timeline)
 {
  std::scoped_lock mutex(mutex_);
-  std::scoped_lock mutex_src(src_pool.mutex_);
+  src_pool.buffers_.update_timeline(timeline);
+  src_pool.image_views_.update_timeline(timeline);
+  src_pool.images_.update_timeline(timeline);
+  src_pool.shader_modules_.update_timeline(timeline);
+  src_pool.pipeline_layouts_.update_timeline(timeline);
+  src_pool.framebuffers_.update_timeline(timeline);
+  src_pool.render_passes_.update_timeline(timeline);
  buffers_.extend(std::move(src_pool.buffers_));
  image_views_.extend(std::move(src_pool.image_views_));
  images_.extend(std::move(src_pool.images_));
@@ -44,120 +50,94 @@ void VKDiscardPool::move_data(VKDiscardPool &src_pool)
  pipeline_layouts_.extend(std::move(src_pool.pipeline_layouts_));
  framebuffers_.extend(std::move(src_pool.framebuffers_));
  render_passes_.extend(std::move(src_pool.render_passes_));
-
-  for (const Map<VkCommandPool, Vector<VkCommandBuffer>>::Item &item :
-       src_pool.command_buffers_.items())
-  {
-    command_buffers_.lookup_or_add_default(item.key).extend(item.value);
-  }
-  src_pool.command_buffers_.clear();
 }

 void VKDiscardPool::discard_image(VkImage vk_image, VmaAllocation vma_allocation)
 {
  std::scoped_lock mutex(mutex_);
-  images_.append(std::pair(vk_image, vma_allocation));
-}
-
-void VKDiscardPool::discard_command_buffer(VkCommandBuffer vk_command_buffer,
-                                           VkCommandPool vk_command_pool)
-{
-  std::scoped_lock mutex(mutex_);
-  command_buffers_.lookup_or_add_default(vk_command_pool).append(vk_command_buffer);
-}
-
-void VKDiscardPool::free_command_pool_buffers(VkCommandPool vk_command_pool, VKDevice &device)
-{
-  std::scoped_lock mutex(mutex_);
-  std::optional<blender::Vector<VkCommandBuffer>> buffers = command_buffers_.pop_try(
-      vk_command_pool);
-  if (!buffers) {
-    return;
-  }
-  vkFreeCommandBuffers(device.vk_handle(), vk_command_pool, (*buffers).size(), (*buffers).begin());
+  images_.append_timeline(timeline_, std::pair(vk_image, vma_allocation));
 }

 void VKDiscardPool::discard_image_view(VkImageView vk_image_view)
 {
  std::scoped_lock mutex(mutex_);
-  image_views_.append(vk_image_view);
+  image_views_.append_timeline(timeline_, vk_image_view);
 }

 void VKDiscardPool::discard_buffer(VkBuffer vk_buffer, VmaAllocation vma_allocation)
 {
  std::scoped_lock mutex(mutex_);
-  buffers_.append(std::pair(vk_buffer, vma_allocation));
+  buffers_.append_timeline(timeline_, std::pair(vk_buffer, vma_allocation));
 }

 void VKDiscardPool::discard_shader_module(VkShaderModule vk_shader_module)
 {
  std::scoped_lock mutex(mutex_);
-  shader_modules_.append(vk_shader_module);
+  shader_modules_.append_timeline(timeline_, vk_shader_module);
 }
 void VKDiscardPool::discard_pipeline_layout(VkPipelineLayout vk_pipeline_layout)
 {
  std::scoped_lock mutex(mutex_);
-  pipeline_layouts_.append(vk_pipeline_layout);
+  pipeline_layouts_.append_timeline(timeline_, vk_pipeline_layout);
 }

 void VKDiscardPool::discard_framebuffer(VkFramebuffer vk_framebuffer)
 {
  std::scoped_lock mutex(mutex_);
-  framebuffers_.append(vk_framebuffer);
+  framebuffers_.append_timeline(timeline_, vk_framebuffer);
 }

 void VKDiscardPool::discard_render_pass(VkRenderPass vk_render_pass)
 {
  std::scoped_lock mutex(mutex_);
-  render_passes_.append(vk_render_pass);
+  render_passes_.append_timeline(timeline_, vk_render_pass);
 }

-void VKDiscardPool::destroy_discarded_resources(VKDevice &device)
+void VKDiscardPool::destroy_discarded_resources(VKDevice &device, bool force)
 {
  std::scoped_lock mutex(mutex_);
+  TimelineValue current_timeline = force ? UINT64_MAX : device.submission_finished_timeline_get();

-  while (!image_views_.is_empty()) {
-    VkImageView vk_image_view = image_views_.pop_last();
+  image_views_.remove_old(current_timeline, [&](VkImageView vk_image_view) {
    vkDestroyImageView(device.vk_handle(), vk_image_view, nullptr);
-  }
+  });

-  while (!images_.is_empty()) {
-    std::pair<VkImage, VmaAllocation> image_allocation = images_.pop_last();
+  images_.remove_old(current_timeline, [&](std::pair<VkImage, VmaAllocation> image_allocation) {
    device.resources.remove_image(image_allocation.first);
    vmaDestroyImage(device.mem_allocator_get(), image_allocation.first, image_allocation.second);
-  }
-
-  while (!buffers_.is_empty()) {
-    std::pair<VkBuffer, VmaAllocation> buffer_allocation = buffers_.pop_last();
+  });
+  buffers_.remove_old(current_timeline, [&](std::pair<VkBuffer, VmaAllocation> buffer_allocation) {
    device.resources.remove_buffer(buffer_allocation.first);
    vmaDestroyBuffer(
        device.mem_allocator_get(), buffer_allocation.first, buffer_allocation.second);
-  }
+  });

-  while (!pipeline_layouts_.is_empty()) {
-    VkPipelineLayout vk_pipeline_layout = pipeline_layouts_.pop_last();
+  pipeline_layouts_.remove_old(current_timeline, [&](VkPipelineLayout vk_pipeline_layout) {
    vkDestroyPipelineLayout(device.vk_handle(), vk_pipeline_layout, nullptr);
-  }
+  });

-  while (!shader_modules_.is_empty()) {
-    VkShaderModule vk_shader_module = shader_modules_.pop_last();
+  shader_modules_.remove_old(current_timeline, [&](VkShaderModule vk_shader_module) {
    vkDestroyShaderModule(device.vk_handle(), vk_shader_module, nullptr);
-  }
+  });

-  while (!framebuffers_.is_empty()) {
-    VkFramebuffer vk_framebuffer = framebuffers_.pop_last();
+  framebuffers_.remove_old(current_timeline, [&](VkFramebuffer vk_framebuffer) {
    vkDestroyFramebuffer(device.vk_handle(), vk_framebuffer, nullptr);
-  }
+  });

-  while (!render_passes_.is_empty()) {
-    VkRenderPass vk_render_pass = render_passes_.pop_last();
+  render_passes_.remove_old(current_timeline, [&](VkRenderPass vk_render_pass) {
    vkDestroyRenderPass(device.vk_handle(), vk_render_pass, nullptr);
+  });
+}
+
+VKDiscardPool &VKDiscardPool::discard_pool_get()
+{
+  VKContext *context = VKContext::get();
+  if (context != nullptr) {
+    return context->discard_pool;
  }

-  for (const Map<VkCommandPool, Vector<VkCommandBuffer>>::Item &item : command_buffers_.items()) {
-    vkFreeCommandBuffers(device.vk_handle(), item.key, item.value.size(), item.value.begin());
-  }
-  command_buffers_.clear();
+  VKDevice &device = VKBackend::get().device;
+  return device.orphaned_data;
 }

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_resource_pool.hh
+++ b/source/blender/gpu/vulkan/vk_resource_pool.hh
@@ -15,6 +15,54 @@

 namespace blender::gpu {
 class VKDevice;
+class VKDiscardPool;
+
+template<typename Item> class TimelineResources : Vector<std::pair<TimelineValue, Item>> {
+  friend class VKDiscardPool;
+
+ public:
+  void append_timeline(TimelineValue timeline, Item item)
+  {
+    BLI_assert_msg(this->is_empty() || this->last().first <= timeline,
+                   "Timeline must be added in order");
+    this->append(std::pair(timeline, item));
+  }
+
+  void update_timeline(TimelineValue timeline)
+  {
+    for (std::pair<TimelineValue, Item> &pair : *this) {
+      pair.first = timeline;
+    }
+  }
+
+  int64_t size() const
+  {
+    return static_cast<const Vector<std::pair<TimelineValue, Item>> &>(*this).size();
+  }
+  bool is_empty() const
+  {
+    return static_cast<const Vector<std::pair<TimelineValue, Item>> &>(*this).is_empty();
+  }
+
+  /**
+   * Remove all items that are used in a timeline before or equal to the current_timeline.
+   */
+  template<typename Deleter> void remove_old(TimelineValue current_timeline, Deleter deleter)
+  {
+    int64_t first_index_to_keep = 0;
+    for (std::pair<TimelineValue, Item> &item : *this) {
+      if (item.first > current_timeline) {
+        break;
+      }
+      deleter(item.second);
+      first_index_to_keep++;
+    }
+
+    if (first_index_to_keep > 0) {
+      this->remove(0, first_index_to_keep);
+    }
+  }
+};

 /**
 * Pool of resources that are discarded, but can still be in used and cannot be destroyed.
@@ -30,31 +78,22 @@ class VKDiscardPool {
  friend class VKDevice;

 private:
-  Vector<std::pair<VkImage, VmaAllocation>> images_;
-  Vector<std::pair<VkBuffer, VmaAllocation>> buffers_;
-  Vector<VkImageView> image_views_;
-  Vector<VkShaderModule> shader_modules_;
-  Vector<VkPipelineLayout> pipeline_layouts_;
-  Vector<VkRenderPass> render_passes_;
-  Vector<VkFramebuffer> framebuffers_;
-  Map<VkCommandPool, Vector<VkCommandBuffer>> command_buffers_;
+  TimelineResources<std::pair<VkImage, VmaAllocation>> images_;
+  TimelineResources<std::pair<VkBuffer, VmaAllocation>> buffers_;
+  TimelineResources<VkImageView> image_views_;
+  TimelineResources<VkShaderModule> shader_modules_;
+  TimelineResources<VkPipelineLayout> pipeline_layouts_;
+  TimelineResources<VkRenderPass> render_passes_;
+  TimelineResources<VkFramebuffer> framebuffers_;

  std::mutex mutex_;

-  /**
-   * Free command buffers generated from `vk_command_pool`.
-   *
-   * Command buffers are freed in `destroy_discarded_resources`, however if a `vk_command_pool` is
-   * going to be destroyed, commands buffers generated from this command pool needs to be freed at
-   * forehand.
-   */
-  void free_command_pool_buffers(VkCommandPool vk_command_pool, VKDevice &device);
+  TimelineValue timeline_ = UINT64_MAX;

 public:
  void deinit(VKDevice &device);

  void discard_image(VkImage vk_image, VmaAllocation vma_allocation);
-  void discard_command_buffer(VkCommandBuffer vk_command_buffer, VkCommandPool vk_command_pool);
  void discard_image_view(VkImageView vk_image_view);
  void discard_buffer(VkBuffer vk_buffer, VmaAllocation vma_allocation);
  void discard_shader_module(VkShaderModule vk_shader_module);
@@ -68,9 +107,19 @@ class VKDiscardPool {
   * GPU resources that are discarded from the dependency graph are stored in the device orphaned
   * data. When a swap chain context list is made active the orphaned data can be merged into a
   * swap chain discard pool.
+   *
+   * All moved items will receive a new timeline.
   */
-  void move_data(VKDiscardPool &src_pool);
-  void destroy_discarded_resources(VKDevice &device);
+  void move_data(VKDiscardPool &src_pool, TimelineValue timeline);
+  void destroy_discarded_resources(VKDevice &device, bool force = false);
+
+  /**
+   * Returns the discard pool for the current thread.
+   *
+   * When active thread has a context it uses the context discard pool.
+   * Otherwise the device discard pool is used.
+   */
+  static VKDiscardPool &discard_pool_get();
 };

 class VKResourcePool {
@@ -78,7 +127,6 @@ class VKResourcePool {
 public:
  VKDescriptorPools descriptor_pools;
  VKDescriptorSetTracker descriptor_set;
-  VKDiscardPool discard_pool;
  VKImmediate immediate;

  void init(VKDevice &device);
--- a/source/blender/gpu/vulkan/vk_resource_tracker.cc
+++ b/source/blender/gpu/vulkan/vk_resource_tracker.cc
@@ -12,7 +12,7 @@
 namespace blender::gpu {
 bool VKSubmissionTracker::is_changed(const VKContext &context)
 {
-  const VKSubmissionID &current_id = context.render_graph.submission_id;
+  const VKSubmissionID &current_id = context.render_graph().submission_id;
  if (last_known_id_ != current_id) {
    last_known_id_ = current_id;
    return true;
--- a/source/blender/gpu/vulkan/vk_shader.cc
+++ b/source/blender/gpu/vulkan/vk_shader.cc
@@ -519,8 +519,7 @@ void VKShader::init(const shader::ShaderCreateInfo &info, bool is_batch_compilat

 VKShader::~VKShader()
 {
-  VKDevice &device = VKBackend::get().device;
-  VKDiscardPool &discard_pool = device.discard_pool_for_current_thread();
+  VKDiscardPool &discard_pool = VKDiscardPool::discard_pool_get();

  if (vk_pipeline_layout != VK_NULL_HANDLE) {
    discard_pool.discard_pipeline_layout(vk_pipeline_layout);
--- a/source/blender/gpu/vulkan/vk_shader_module.cc
+++ b/source/blender/gpu/vulkan/vk_shader_module.cc
@@ -16,8 +16,7 @@
 namespace blender::gpu {
 VKShaderModule::~VKShaderModule()
 {
-  VKDevice &device = VKBackend::get().device;
-  VKDiscardPool &discard_pool = device.discard_pool_for_current_thread();
+  VKDiscardPool &discard_pool = VKDiscardPool::discard_pool_get();
  if (vk_shader_module != VK_NULL_HANDLE) {
    discard_pool.discard_shader_module(vk_shader_module);
    vk_shader_module = VK_NULL_HANDLE;
--- a/source/blender/gpu/vulkan/vk_staging_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_staging_buffer.cc
@@ -40,7 +40,7 @@ void VKStagingBuffer::copy_to_device(VKContext &context)
  copy_buffer.dst_buffer = device_buffer_.vk_handle();
  copy_buffer.region.size = device_buffer_.size_in_bytes();

-  context.render_graph.add_node(copy_buffer);
+  context.render_graph().add_node(copy_buffer);
 }

 void VKStagingBuffer::copy_from_device(VKContext &context)
@@ -51,7 +51,7 @@ void VKStagingBuffer::copy_from_device(VKContext &context)
  copy_buffer.dst_buffer = host_buffer_.vk_handle();
  copy_buffer.region.size = device_buffer_.size_in_bytes();

-  context.render_graph.add_node(copy_buffer);
+  context.render_graph().add_node(copy_buffer);
 }

 void VKStagingBuffer::free()
--- a/source/blender/gpu/vulkan/vk_storage_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_storage_buffer.cc
@@ -88,7 +88,7 @@ void VKStorageBuffer::copy_sub(VertBuf *src, uint dst_offset, uint src_offset, u
  copy_buffer.region.size = copy_size;

  VKContext &context = *VKContext::get();
-  context.render_graph.add_node(copy_buffer);
+  context.render_graph().add_node(copy_buffer);
 }

 void VKStorageBuffer::async_flush_to_host()
--- a/source/blender/gpu/vulkan/vk_texture.cc
+++ b/source/blender/gpu/vulkan/vk_texture.cc
@@ -41,8 +41,7 @@ static VkImageAspectFlags to_vk_image_aspect_single_bit(const VkImageAspectFlags
 VKTexture::~VKTexture()
 {
  if (vk_image_ != VK_NULL_HANDLE && allocation_ != VK_NULL_HANDLE) {
-    VKDevice &device = VKBackend::get().device;
-    device.discard_pool_for_current_thread().discard_image(vk_image_, allocation_);
+    VKDiscardPool::discard_pool_get().discard_image(vk_image_, allocation_);
    vk_image_ = VK_NULL_HANDLE;
    allocation_ = VK_NULL_HANDLE;
  }
@@ -83,7 +82,7 @@ void VKTexture::generate_mipmap()
  update_mipmaps.vk_image_aspect = to_vk_image_aspect_flag_bits(device_format_);
  update_mipmaps.mipmaps = mipmaps_;
  update_mipmaps.layer_count = vk_layer_count(1);
-  context.render_graph.add_node(update_mipmaps);
+  context.render_graph().add_node(update_mipmaps);
 }

 void VKTexture::copy_to(VKTexture &dst_texture, VkImageAspectFlags vk_image_aspect)
@@ -101,7 +100,7 @@ void VKTexture::copy_to(VKTexture &dst_texture, VkImageAspectFlags vk_image_aspe
  copy_image.vk_image_aspect = to_vk_image_aspect_flag_bits(device_format_get());

  VKContext &context = *VKContext::get();
-  context.render_graph.add_node(copy_image);
+  context.render_graph().add_node(copy_image);
 }

 void VKTexture::copy_to(Texture *tex)
@@ -142,7 +141,7 @@ void VKTexture::clear(eGPUDataFormat format, const void *data)

  VKContext &context = *VKContext::get();

-  context.render_graph.add_node(clear_color_image);
+  context.render_graph().add_node(clear_color_image);
 }

 void VKTexture::clear_depth_stencil(const eGPUFrameBufferBits buffers,
@@ -171,7 +170,7 @@ void VKTexture::clear_depth_stencil(const eGPUFrameBufferBits buffers,
      VK_REMAINING_MIP_LEVELS;

  VKContext &context = *VKContext::get();
-  context.render_graph.add_node(clear_depth_stencil_image);
+  context.render_graph().add_node(clear_depth_stencil_image);
 }

 void VKTexture::swizzle_set(const char swizzle_mask[4])
@@ -220,9 +219,12 @@ void VKTexture::read_sub(

  VKContext &context = *VKContext::get();
  context.rendering_end();
-  context.render_graph.add_node(copy_image_to_buffer);
+  context.render_graph().add_node(copy_image_to_buffer);
  context.descriptor_set_get().upload_descriptor_sets();
-  context.render_graph.submit_for_read();
+
+  context.flush_render_graph(RenderGraphFlushFlags::SUBMIT |
+                             RenderGraphFlushFlags::RENEW_RENDER_GRAPH |
+                             RenderGraphFlushFlags::WAIT_FOR_COMPLETION);

  convert_device_to_host(
      r_data, staging_buffer.mapped_memory_get(), sample_len, format, format_, device_format_);
@@ -363,7 +365,7 @@ void VKTexture::update_sub(int mip,
  node_data.region.imageSubresource.baseArrayLayer = start_layer;
  node_data.region.imageSubresource.layerCount = layers;

-  context.render_graph.add_node(copy_buffer_to_image);
+  context.render_graph().add_node(copy_buffer_to_image);
 }

 void VKTexture::update_sub(