Merge branch 'blender-v4.3-release'

2024-10-14 15:45:34 +02:00
parent bb672a7e7c ceb61ac921
commit 399060aa0e
20 changed files with 200 additions and 108 deletions
--- a/intern/ghost/GHOST_C-api.h
+++ b/intern/ghost/GHOST_C-api.h
@@ -1277,13 +1277,17 @@ int GHOST_XrGetControllerModelData(GHOST_XrContextHandle xr_context,
 * \param r_queue: After calling this function the VkQueue
 *     referenced by this parameter will contain the VKQueue handle
 *     of the context associated with the `context` parameter.
+ * \param r_queue_mutex: After calling this function the std::mutex referred
+ *     by this parameter will contain the mutex of the context associated
+ *     with the context parameter.
 */
 void GHOST_GetVulkanHandles(GHOST_ContextHandle context,
                            void *r_instance,
                            void *r_physical_device,
                            void *r_device,
                            uint32_t *r_graphic_queue_family,
-                            void *r_queue);
+                            void *r_queue,
+                            void **r_queue_mutex);

 /**
 * Set the pre and post callbacks for vulkan swap chain in the given context.
--- a/intern/ghost/GHOST_IContext.hh
+++ b/intern/ghost/GHOST_IContext.hh
@@ -68,12 +68,16 @@ class GHOST_IContext {
   * \param r_queue: After calling this function the VkQueue
   *     referenced by this parameter will contain the VKQueue handle
   *     of the context associated with the `context` parameter.
+   * \param r_queue_mutex: After calling this function the std::mutex referred
+   *     by this parameter will contain the mutex of the context associated
+   *     with the context parameter.
   */
  virtual GHOST_TSuccess getVulkanHandles(void *r_instance,
                                          void *r_physical_device,
                                          void *r_device,
                                          uint32_t *r_graphic_queue_family,
-                                          void *r_queue) = 0;
+                                          void *r_queue,
+                                          void **r_queue_mutex) = 0;

  /**
   * Acquire the current swap chain format.
--- a/intern/ghost/intern/GHOST_C-api.cc
+++ b/intern/ghost/intern/GHOST_C-api.cc
@@ -1256,11 +1256,12 @@ void GHOST_GetVulkanHandles(GHOST_ContextHandle contexthandle,
                            void *r_physical_device,
                            void *r_device,
                            uint32_t *r_graphic_queue_family,
-                            void *r_queue)
+                            void *r_queue,
+                            void **r_queue_mutex)
 {
  GHOST_IContext *context = (GHOST_IContext *)contexthandle;
  context->getVulkanHandles(
-      r_instance, r_physical_device, r_device, r_graphic_queue_family, r_queue);
+      r_instance, r_physical_device, r_device, r_graphic_queue_family, r_queue, r_queue_mutex);
 }

 void GHOST_SetVulkanSwapBuffersCallbacks(
--- a/intern/ghost/intern/GHOST_Context.hh
+++ b/intern/ghost/intern/GHOST_Context.hh
@@ -154,6 +154,9 @@ class GHOST_Context : public GHOST_IContext {
   * \param r_queue: After calling this function the VkQueue
   *     referenced by this parameter will contain the VKQueue handle
   *     of the context associated with the `context` parameter.
+   * \param r_queue_mutex: After calling this function the std::mutex referred
+   *     by this parameter will contain the mutex of the context associated
+   *     with the context parameter.
   * \returns GHOST_kFailure when context isn't a Vulkan context.
   *     GHOST_kSuccess when the context is a Vulkan context and the
   *     handles have been set.
@@ -162,7 +165,8 @@ class GHOST_Context : public GHOST_IContext {
                                          void * /*r_physical_device*/,
                                          void * /*r_device*/,
                                          uint32_t * /*r_graphic_queue_family*/,
-                                          void * /*r_queue*/) override
+                                          void * /*r_queue*/,
+                                          void ** /*r_queue_mutex*/) override
  {
    return GHOST_kFailure;
  };
--- a/intern/ghost/intern/GHOST_ContextVK.cc
+++ b/intern/ghost/intern/GHOST_ContextVK.cc
@@ -27,6 +27,7 @@
 #include <cstdio>
 #include <cstring>
 #include <iostream>
+#include <mutex>
 #include <optional>
 #include <sstream>

@@ -136,6 +137,9 @@ class GHOST_DeviceVK {

  int users = 0;

+  /** Mutex to externally synchronize access to queue. */
+  std::mutex queue_mutex;
+
 public:
  GHOST_DeviceVK(VkInstance vk_instance, VkPhysicalDevice vk_physical_device)
      : instance(vk_instance), physical_device(vk_physical_device)
@@ -420,7 +424,7 @@ static GHOST_TSuccess ensure_vulkan_device(VkInstance vk_instance,
    return GHOST_kFailure;
  }

-  vulkan_device = std::make_optional<GHOST_DeviceVK>(vk_instance, best_physical_device);
+  vulkan_device.emplace(vk_instance, best_physical_device);

  return GHOST_kSuccess;
 }
@@ -564,7 +568,11 @@ GHOST_TSuccess GHOST_ContextVK::swapBuffers()
  present_info.pImageIndices = &s_currentImage;
  present_info.pResults = nullptr;

-  VkResult result = vkQueuePresentKHR(m_present_queue, &present_info);
+  VkResult result = VK_SUCCESS;
+  {
+    std::scoped_lock lock(vulkan_device->queue_mutex);
+    result = vkQueuePresentKHR(m_present_queue, &present_info);
+  }
  if (result == VK_ERROR_OUT_OF_DATE_KHR || result == VK_SUBOPTIMAL_KHR) {
    /* Swap-chain is out of date. Recreate swap-chain and skip this frame. */
    destroySwapchain();
@@ -608,7 +616,8 @@ GHOST_TSuccess GHOST_ContextVK::getVulkanHandles(void *r_instance,
                                                 void *r_physical_device,
                                                 void *r_device,
                                                 uint32_t *r_graphic_queue_family,
-                                                 void *r_queue)
+                                                 void *r_queue,
+                                                 void **r_queue_mutex)
 {
  *((VkInstance *)r_instance) = VK_NULL_HANDLE;
  *((VkPhysicalDevice *)r_physical_device) = VK_NULL_HANDLE;
@@ -619,6 +628,8 @@ GHOST_TSuccess GHOST_ContextVK::getVulkanHandles(void *r_instance,
    *((VkPhysicalDevice *)r_physical_device) = vulkan_device->physical_device;
    *((VkDevice *)r_device) = vulkan_device->device;
    *r_graphic_queue_family = vulkan_device->generic_queue_family;
+    std::mutex **queue_mutex = (std::mutex **)r_queue_mutex;
+    *queue_mutex = &vulkan_device->queue_mutex;
  }

  *((VkQueue *)r_queue) = m_graphic_queue;
--- a/intern/ghost/intern/GHOST_ContextVK.hh
+++ b/intern/ghost/intern/GHOST_ContextVK.hh
@@ -127,7 +127,8 @@ class GHOST_ContextVK : public GHOST_Context {
                                  void *r_physical_device,
                                  void *r_device,
                                  uint32_t *r_graphic_queue_family,
-                                  void *r_queue) override;
+                                  void *r_queue,
+                                  void **r_queue_mutex) override;

  GHOST_TSuccess getVulkanSwapChainFormat(GHOST_VulkanSwapChainData *r_swap_chain_data) override;

--- a/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh
+++ b/source/blender/gpu/vulkan/render_graph/tests/vk_render_graph_test_types.hh
@@ -45,13 +45,13 @@ class CommandBufferLog : public VKCommandBufferInterface {
    is_recording_ = false;
  }

-  void submit_with_cpu_synchronization() override
+  void submit_with_cpu_synchronization(VkFence /*vk_fence*/) override
  {
    EXPECT_FALSE(is_recording_);
    EXPECT_FALSE(is_cpu_synchronizing_);
    is_cpu_synchronizing_ = true;
  };
-  void wait_for_cpu_synchronization() override
+  void wait_for_cpu_synchronization(VkFence /*vk_fence*/) override
  {
    EXPECT_FALSE(is_recording_);
    EXPECT_TRUE(is_cpu_synchronizing_);
--- a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc
+++ b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.cc
@@ -15,7 +15,6 @@ VKCommandBufferWrapper::VKCommandBufferWrapper()
 {
  vk_command_pool_create_info_ = {};
  vk_command_pool_create_info_.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
-  vk_command_pool_create_info_.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
  vk_command_pool_create_info_.queueFamilyIndex = 0;

  vk_command_buffer_allocate_info_ = {};
@@ -26,6 +25,7 @@ VKCommandBufferWrapper::VKCommandBufferWrapper()

  vk_command_buffer_begin_info_ = {};
  vk_command_buffer_begin_info_.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+  vk_command_buffer_begin_info_.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;

  vk_fence_create_info_ = {};
  vk_fence_create_info_.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
@@ -73,14 +73,9 @@ void VKCommandBufferWrapper::begin_recording()
  if (vk_fence_ == VK_NULL_HANDLE) {
    vkCreateFence(device.vk_handle(), &vk_fence_create_info_, vk_allocation_callbacks, &vk_fence_);
  }
-
-  if (vk_command_buffer_ == VK_NULL_HANDLE) {
-    vkAllocateCommandBuffers(
-        device.vk_handle(), &vk_command_buffer_allocate_info_, &vk_command_buffer_);
-  }
-  else {
-    vkResetCommandBuffer(vk_command_buffer_, 0);
-  }
+  BLI_assert(vk_command_buffer_ == VK_NULL_HANDLE);
+  vkAllocateCommandBuffers(
+      device.vk_handle(), &vk_command_buffer_allocate_info_, &vk_command_buffer_);

  vkBeginCommandBuffer(vk_command_buffer_, &vk_command_buffer_begin_info_);
 }
@@ -90,17 +85,27 @@ void VKCommandBufferWrapper::end_recording()
  vkEndCommandBuffer(vk_command_buffer_);
 }

-void VKCommandBufferWrapper::submit_with_cpu_synchronization()
+void VKCommandBufferWrapper::submit_with_cpu_synchronization(VkFence vk_fence)
 {
+  if (vk_fence == VK_NULL_HANDLE) {
+    vk_fence = vk_fence_;
+  }
  VKDevice &device = VKBackend::get().device;
-  vkResetFences(device.vk_handle(), 1, &vk_fence_);
-  vkQueueSubmit(device.queue_get(), 1, &vk_submit_info_, vk_fence_);
+  vkResetFences(device.vk_handle(), 1, &vk_fence);
+  {
+    std::scoped_lock lock(device.queue_mutex_get());
+    vkQueueSubmit(device.queue_get(), 1, &vk_submit_info_, vk_fence);
+  }
+  vk_command_buffer_ = nullptr;
 }

-void VKCommandBufferWrapper::wait_for_cpu_synchronization()
+void VKCommandBufferWrapper::wait_for_cpu_synchronization(VkFence vk_fence)
 {
+  if (vk_fence == VK_NULL_HANDLE) {
+    vk_fence = vk_fence_;
+  }
  VKDevice &device = VKBackend::get().device;
-  while (vkWaitForFences(device.vk_handle(), 1, &vk_fence_, true, UINT64_MAX) == VK_TIMEOUT) {
+  while (vkWaitForFences(device.vk_handle(), 1, &vk_fence, true, UINT64_MAX) == VK_TIMEOUT) {
  }
 }

--- a/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh
+++ b/source/blender/gpu/vulkan/render_graph/vk_command_buffer_wrapper.hh
@@ -18,8 +18,8 @@ class VKCommandBufferInterface {

  virtual void begin_recording() = 0;
  virtual void end_recording() = 0;
-  virtual void submit_with_cpu_synchronization() = 0;
-  virtual void wait_for_cpu_synchronization() = 0;
+  virtual void submit_with_cpu_synchronization(VkFence vk_fence = VK_NULL_HANDLE) = 0;
+  virtual void wait_for_cpu_synchronization(VkFence vk_fence = VK_NULL_HANDLE) = 0;

  virtual void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) = 0;
  virtual void bind_descriptor_sets(VkPipelineBindPoint pipeline_bind_point,
@@ -152,8 +152,8 @@ class VKCommandBufferWrapper : public VKCommandBufferInterface {

  void begin_recording() override;
  void end_recording() override;
-  void submit_with_cpu_synchronization() override;
-  void wait_for_cpu_synchronization() override;
+  void submit_with_cpu_synchronization(VkFence vk_fence) override;
+  void wait_for_cpu_synchronization(VkFence vk_fence) override;

  void bind_pipeline(VkPipelineBindPoint pipeline_bind_point, VkPipeline pipeline) override;
  void bind_descriptor_sets(VkPipelineBindPoint pipeline_bind_point,
--- a/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc
+++ b/source/blender/gpu/vulkan/render_graph/vk_render_graph.cc
@@ -75,14 +75,25 @@ void VKRenderGraph::submit_buffer_for_read(VkBuffer vk_buffer)
 }

 void VKRenderGraph::submit()
+{
+  /* Using `VK_NULL_HANDLE` will select the default VkFence of the command buffer. */
+  submit_synchronization_event(VK_NULL_HANDLE);
+  wait_synchronization_event(VK_NULL_HANDLE);
+}
+
+void VKRenderGraph::submit_synchronization_event(VkFence vk_fence)
 {
  std::scoped_lock lock(resources_.mutex);
  Span<NodeHandle> node_handles = scheduler_.select_nodes(*this);
  command_builder_.build_nodes(*this, *command_buffer_, node_handles);
-  command_buffer_->submit_with_cpu_synchronization();
+  command_buffer_->submit_with_cpu_synchronization(vk_fence);
  submission_id.next();
  remove_nodes(node_handles);
-  command_buffer_->wait_for_cpu_synchronization();
+}
+
+void VKRenderGraph::wait_synchronization_event(VkFence vk_fence)
+{
+  command_buffer_->wait_for_cpu_synchronization(vk_fence);
 }

 /** \} */
--- a/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh
+++ b/source/blender/gpu/vulkan/render_graph/vk_render_graph.hh
@@ -230,6 +230,10 @@ class VKRenderGraph : public NonCopyable {
   */
  void submit();

+  /**  Submit render graph with CPU synchronization event. */
+  void submit_synchronization_event(VkFence vk_fence);
+  /** Wait and reset for a CPU synchronization event. */
+  void wait_synchronization_event(VkFence vk_fence);
  /**
   * Push a new debugging group to the stack with the given name.
   *
--- a/source/blender/gpu/vulkan/vk_backend.cc
+++ b/source/blender/gpu/vulkan/vk_backend.cc
@@ -106,6 +106,17 @@ static Vector<StringRefNull> missing_capabilities_get(VkPhysicalDevice vk_physic
  if (!extensions.contains(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME)) {
    missing_capabilities.append(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
  }
+  /* VK_EXT_dynamic_rendering_unused_attachments are required for correct working. Renderdoc hides
+   * this extension, but most platforms do work. However when the Windows Intel driver crashes when
+   * using iGPUs; they don't support this extension at all.
+   *
+   * TODO(jbakker): Make dynamic rendering optional to allow running on Windows/Intel iGPU.
+   */
+  if (!bool(G.debug & G_DEBUG_GPU_RENDERDOC)) {
+    if (!extensions.contains(VK_EXT_DYNAMIC_RENDERING_UNUSED_ATTACHMENTS_EXTENSION_NAME)) {
+      missing_capabilities.append(VK_EXT_DYNAMIC_RENDERING_UNUSED_ATTACHMENTS_EXTENSION_NAME);
+    }
+  }

  return missing_capabilities;
 }
@@ -375,7 +386,7 @@ Context *VKBackend::context_alloc(void *ghost_window, void *ghost_context)
    device.init(ghost_context);
  }

-  VKContext *context = new VKContext(ghost_window, ghost_context, device.current_thread_data());
+  VKContext *context = new VKContext(ghost_window, ghost_context, device.resources);
  device.context_register(*context);
  GHOST_SetVulkanSwapBuffersCallbacks((GHOST_ContextHandle)ghost_context,
                                      VKContext::swap_buffers_pre_callback,
@@ -455,20 +466,25 @@ void VKBackend::render_end()
  VKThreadData &thread_data = device.current_thread_data();
  thread_data.rendering_depth -= 1;
  BLI_assert_msg(thread_data.rendering_depth >= 0, "Unbalanced `GPU_render_begin/end`");
-
-  if (G.background || !BLI_thread_is_main()) {
-    /* When **not** running on the main thread (or doing background rendering) we assume that there
-     * is no swap chain in play. Rendering happens on a single thread and when finished all the
-     * resources have been used and are in a state that they can be discarded. It can still be that
-     * a non-main thread discards a resource that is in use by another thread. We move discarded
-     * resources to a device global discard pool (`device.orphaned_data`). The next time the main
-     * thread goes to the next swap chain image the device global discard pool will be added to the
-     * discard pool of the new swap chain image.*/
+  if (G.background) {
+    /* Garbage collection when performing background rendering. In this case the rendering is
+     * already 'thread-safe'. We move the resources to the device discard list and we destroy it
+     * the next frame. */
    if (thread_data.rendering_depth == 0) {
      VKResourcePool &resource_pool = thread_data.resource_pool_get();
-      resource_pool.discard_pool.destroy_discarded_resources(device);
+      device.orphaned_data.destroy_discarded_resources(device);
+      device.orphaned_data.move_data(resource_pool.discard_pool);
+      resource_pool.reset();
+    }
+  }
+
+  else if (!BLI_thread_is_main()) {
+    /* Foreground rendering using a worker/render thread. In this case we move the resources to the
+     * device discard list and it will be cleared by the main thread. */
+    if (thread_data.rendering_depth == 0) {
+      VKResourcePool &resource_pool = thread_data.resource_pool_get();
+      device.orphaned_data.move_data(resource_pool.discard_pool);
      resource_pool.reset();
-      resource_pool.discard_pool.move_data(device.orphaned_data);
    }
  }
 }
--- a/source/blender/gpu/vulkan/vk_buffer.cc
+++ b/source/blender/gpu/vulkan/vk_buffer.cc
@@ -40,10 +40,14 @@ static VmaAllocationCreateFlags vma_allocation_flags(GPUUsageType usage)
  return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT;
 }

-static VkMemoryPropertyFlags vma_preferred_flags(const bool is_host_visible)
+static VkMemoryPropertyFlags vma_preferred_flags()
 {
-  return is_host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT :
-                           VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+  return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
+}
+
+static VkMemoryPropertyFlags vma_required_flags(const bool is_host_visible)
+{
+  return is_host_visible ? VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT : 0;
 }

 /*
@@ -83,7 +87,8 @@ bool VKBuffer::create(size_t size_in_bytes,
  VmaAllocationCreateInfo vma_create_info = {};
  vma_create_info.flags = vma_allocation_flags(usage);
  vma_create_info.priority = 1.0f;
-  vma_create_info.preferredFlags = vma_preferred_flags(is_host_visible);
+  vma_create_info.requiredFlags = vma_required_flags(is_host_visible);
+  vma_create_info.preferredFlags = vma_preferred_flags();
  vma_create_info.usage = VMA_MEMORY_USAGE_AUTO;

  VkResult result = vmaCreateBuffer(
--- a/source/blender/gpu/vulkan/vk_context.cc
+++ b/source/blender/gpu/vulkan/vk_context.cc
@@ -21,14 +21,15 @@

 namespace blender::gpu {

-VKContext::VKContext(void *ghost_window, void *ghost_context, VKThreadData &thread_data)
-    : thread_data_(thread_data), render_graph(thread_data_.render_graph)
+VKContext::VKContext(void *ghost_window,
+                     void *ghost_context,
+                     render_graph::VKResourceStateTracker &resources)
+    : render_graph(std::make_unique<render_graph::VKCommandBufferWrapper>(), resources)
 {
  ghost_window_ = ghost_window;
  ghost_context_ = ghost_context;

  state_manager = new VKStateManager();
-  imm = &thread_data.resource_pool_get().immediate;

  back_left = new VKFrameBuffer("back_left");
  front_left = new VKFrameBuffer("front_left");
@@ -57,9 +58,9 @@ void VKContext::sync_backbuffer()
  if (ghost_window_) {
    GHOST_VulkanSwapChainData swap_chain_data = {};
    GHOST_GetVulkanSwapChainFormat((GHOST_WindowHandle)ghost_window_, &swap_chain_data);
-    if (assign_if_different(thread_data_.resource_pool_index, swap_chain_data.swap_chain_index)) {
-      thread_data_.resource_pool_index = swap_chain_data.swap_chain_index;
-      VKResourcePool &resource_pool = thread_data_.resource_pool_get();
+    VKThreadData &thread_data = thread_data_.value().get();
+    if (assign_if_different(thread_data.resource_pool_index, swap_chain_data.swap_chain_index)) {
+      VKResourcePool &resource_pool = thread_data.resource_pool_get();
      imm = &resource_pool.immediate;
      resource_pool.discard_pool.destroy_discarded_resources(device);
      resource_pool.reset();
@@ -108,6 +109,12 @@ void VKContext::activate()
  /* Make sure no other context is already bound to this thread. */
  BLI_assert(is_active_ == false);

+  VKDevice &device = VKBackend::get().device;
+  VKThreadData &thread_data = device.current_thread_data();
+  thread_data_ = std::reference_wrapper<VKThreadData>(thread_data);
+
+  imm = &thread_data.resource_pool_get().immediate;
+
  is_active_ = true;

  sync_backbuffer();
@@ -117,21 +124,16 @@ void VKContext::activate()

 void VKContext::deactivate()
 {
-  rendering_end();
+  flush_render_graph();
  immDeactivate();
+  imm = nullptr;
+  thread_data_.reset();
  is_active_ = false;
 }

 void VKContext::begin_frame() {}

-void VKContext::end_frame()
-{
-  /* Enable this to track how resources are managed per thread and resource pool. */
-#if 0
-  VKDevice &device = VKBackend::get().device;
-  device.debug_print();
-#endif
-}
+void VKContext::end_frame() {}

 void VKContext::flush() {}

@@ -161,12 +163,12 @@ void VKContext::memory_statistics_get(int *r_total_mem_kb, int *r_free_mem_kb)

 VKDescriptorPools &VKContext::descriptor_pools_get()
 {
-  return thread_data_.resource_pool_get().descriptor_pools;
+  return thread_data_.value().get().resource_pool_get().descriptor_pools;
 }

 VKDescriptorSetTracker &VKContext::descriptor_set_get()
 {
-  return thread_data_.resource_pool_get().descriptor_set;
+  return thread_data_.value().get().resource_pool_get().descriptor_set;
 }

 VKStateManager &VKContext::state_manager_get() const
--- a/source/blender/gpu/vulkan/vk_context.hh
+++ b/source/blender/gpu/vulkan/vk_context.hh
@@ -36,12 +36,14 @@ class VKContext : public Context, NonCopyable {
  /* Reusable data. Stored inside context to limit reallocations. */
  render_graph::VKResourceAccessInfo access_info_ = {};

-  VKThreadData &thread_data_;
+  std::optional<std::reference_wrapper<VKThreadData>> thread_data_;

 public:
-  render_graph::VKRenderGraph &render_graph;
+  render_graph::VKRenderGraph render_graph;

-  VKContext(void *ghost_window, void *ghost_context, VKThreadData &thread_data);
+  VKContext(void *ghost_window,
+            void *ghost_context,
+            render_graph::VKResourceStateTracker &resources);
  virtual ~VKContext();

  void activate() override;
--- a/source/blender/gpu/vulkan/vk_device.cc
+++ b/source/blender/gpu/vulkan/vk_device.cc
@@ -75,12 +75,15 @@ bool VKDevice::is_initialized() const
 void VKDevice::init(void *ghost_context)
 {
  BLI_assert(!is_initialized());
+  void *queue_mutex = nullptr;
  GHOST_GetVulkanHandles((GHOST_ContextHandle)ghost_context,
                         &vk_instance_,
                         &vk_physical_device_,
                         &vk_device_,
                         &vk_queue_family_,
-                         &vk_queue_);
+                         &vk_queue_,
+                         &queue_mutex);
+  queue_mutex_ = static_cast<std::mutex *>(queue_mutex);

  init_physical_device_properties();
  init_physical_device_memory_properties();
@@ -327,11 +330,7 @@ std::string VKDevice::driver_version() const
 /** \name VKThreadData
 * \{ */

-VKThreadData::VKThreadData(VKDevice &device,
-                           pthread_t thread_id,
-                           std::unique_ptr<render_graph::VKCommandBufferInterface> command_buffer,
-                           render_graph::VKResourceStateTracker &resources)
-    : thread_id(thread_id), render_graph(std::move(command_buffer), resources)
+VKThreadData::VKThreadData(VKDevice &device, pthread_t thread_id) : thread_id(thread_id)
 {
  for (VKResourcePool &resource_pool : resource_pools) {
    resource_pool.init(device);
@@ -362,11 +361,7 @@ VKThreadData &VKDevice::current_thread_data()
    }
  }

-  VKThreadData *thread_data = new VKThreadData(
-      *this,
-      current_thread_id,
-      std::make_unique<render_graph::VKCommandBufferWrapper>(),
-      resources);
+  VKThreadData *thread_data = new VKThreadData(*this, current_thread_id);
  thread_data_.append(thread_data);
  return *thread_data;
 }
@@ -375,9 +370,11 @@ VKDiscardPool &VKDevice::discard_pool_for_current_thread()
 {
  std::scoped_lock mutex(resources.mutex);
  pthread_t current_thread_id = pthread_self();
-  for (VKThreadData *thread_data : thread_data_) {
-    if (pthread_equal(thread_data->thread_id, current_thread_id)) {
-      return thread_data->resource_pool_get().discard_pool;
+  if (BLI_thread_is_main()) {
+    for (VKThreadData *thread_data : thread_data_) {
+      if (pthread_equal(thread_data->thread_id, current_thread_id)) {
+        return thread_data->resource_pool_get().discard_pool;
+      }
    }
  }

@@ -387,19 +384,11 @@ VKDiscardPool &VKDevice::discard_pool_for_current_thread()
 void VKDevice::context_register(VKContext &context)
 {
  contexts_.append(std::reference_wrapper(context));
-  current_thread_data().num_contexts += 1;
 }

 void VKDevice::context_unregister(VKContext &context)
 {
  contexts_.remove(contexts_.first_index_of(std::reference_wrapper(context)));
-
-  auto &thread_data = current_thread_data();
-  thread_data.num_contexts -= 1;
-  BLI_assert(thread_data.num_contexts >= 0);
-  if (thread_data.num_contexts == 0) {
-    discard_pool_for_current_thread().destroy_discarded_resources(*this);
-  }
 }
 Span<std::reference_wrapper<VKContext>> VKDevice::contexts_get() const
 {
@@ -482,7 +471,6 @@ void VKDevice::debug_print()
    const bool is_main = pthread_equal(thread_data->thread_id, pthread_self());
    os << "ThreadData" << (is_main ? " (main-thread)" : "") << ")\n";
    os << " Rendering_depth: " << thread_data->rendering_depth << "\n";
-    os << " Number of contexts: " << thread_data->num_contexts << "\n";
    for (int resource_pool_index : IndexRange(thread_data->resource_pools.size())) {
      const VKResourcePool &resource_pool = thread_data->resource_pools[resource_pool_index];
      const bool is_active = thread_data->resource_pool_index == resource_pool_index;
--- a/source/blender/gpu/vulkan/vk_device.hh
+++ b/source/blender/gpu/vulkan/vk_device.hh
@@ -71,7 +71,6 @@ class VKThreadData : public NonCopyable, NonMovable {
 public:
  /** Thread ID this instance belongs to. */
  pthread_t thread_id;
-  render_graph::VKRenderGraph render_graph;
  /**
   * Index of the active resource pool. Is in sync with the active swap chain image or cycled when
   * rendering.
@@ -91,16 +90,7 @@ class VKThreadData : public NonCopyable, NonMovable {
   */
  int32_t rendering_depth = 0;

-  /**
-   * Number of contexts registered in the current thread.
-   * Discarded resources are destroyed when all contexts are unregistered.
-   */
-  int32_t num_contexts = 0;
-
-  VKThreadData(VKDevice &device,
-               pthread_t thread_id,
-               std::unique_ptr<render_graph::VKCommandBufferInterface> command_buffer,
-               render_graph::VKResourceStateTracker &resources);
+  VKThreadData(VKDevice &device, pthread_t thread_id);
  void deinit(VKDevice &device);

  /**
@@ -134,6 +124,7 @@ class VKDevice : public NonCopyable {
  VkDevice vk_device_ = VK_NULL_HANDLE;
  uint32_t vk_queue_family_ = 0;
  VkQueue vk_queue_ = VK_NULL_HANDLE;
+  std::mutex *queue_mutex_ = nullptr;

  VKSamplers samplers_;
  VKDescriptorSetLayouts descriptor_set_layouts_;
@@ -233,6 +224,10 @@ class VKDevice : public NonCopyable {
  {
    return vk_queue_;
  }
+  std::mutex &queue_mutex_get()
+  {
+    return *queue_mutex_;
+  }

  const uint32_t queue_family_get() const
  {
--- a/source/blender/gpu/vulkan/vk_fence.cc
+++ b/source/blender/gpu/vulkan/vk_fence.cc
@@ -7,18 +7,48 @@
 */

 #include "vk_fence.hh"
+#include "vk_backend.hh"
 #include "vk_common.hh"
+#include "vk_context.hh"
+#include "vk_memory.hh"

 namespace blender::gpu {

+VKFence::~VKFence()
+{
+  if (vk_fence_ != VK_NULL_HANDLE) {
+    VK_ALLOCATION_CALLBACKS
+    VKDevice &device = VKBackend::get().device;
+    vkDestroyFence(device.vk_handle(), vk_fence_, vk_allocation_callbacks);
+    vk_fence_ = VK_NULL_HANDLE;
+  }
+}
+
 void VKFence::signal()
 {
-  NOT_YET_IMPLEMENTED
+  if (vk_fence_ == VK_NULL_HANDLE) {
+    VK_ALLOCATION_CALLBACKS
+    VKDevice &device = VKBackend::get().device;
+    VkFenceCreateInfo vk_fence_create_info = {};
+    vk_fence_create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+    vk_fence_create_info.flags = VK_FENCE_CREATE_SIGNALED_BIT;
+    vkCreateFence(device.vk_handle(), &vk_fence_create_info, vk_allocation_callbacks, &vk_fence_);
+  }
+  VKContext &context = *VKContext::get();
+  context.rendering_end();
+  context.descriptor_set_get().upload_descriptor_sets();
+  context.render_graph.submit_synchronization_event(vk_fence_);
+  signalled_ = true;
 }

 void VKFence::wait()
 {
-  NOT_YET_IMPLEMENTED
+  if (!signalled_) {
+    return;
+  }
+  VKContext &context = *VKContext::get();
+  context.render_graph.wait_synchronization_event(vk_fence_);
+  signalled_ = false;
 }

 }  // namespace blender::gpu
--- a/source/blender/gpu/vulkan/vk_fence.hh
+++ b/source/blender/gpu/vulkan/vk_fence.hh
@@ -10,9 +10,18 @@

 #include "gpu_state_private.hh"

+#include "vk_common.hh"
+
 namespace blender::gpu {

 class VKFence : public Fence {
+ private:
+  VkFence vk_fence_ = VK_NULL_HANDLE;
+  bool signalled_ = false;
+
+ protected:
+  virtual ~VKFence();
+
 public:
  void signal() override;
  void wait() override;
--- a/source/blender/modifiers/intern/MOD_grease_pencil_dash.cc
+++ b/source/blender/modifiers/intern/MOD_grease_pencil_dash.cc
@@ -204,13 +204,13 @@ static bke::CurvesGeometry create_dashes(const PatternInfo &pattern_info,
                                         const IndexMask &curves_mask)
 {
  const bke::AttributeAccessor src_attributes = src_curves.attributes();
-  const VArray<bool> src_cyclic = *src_attributes.lookup_or_default(
-      "cyclic", bke::AttrDomain::Curve, false);
+  const VArray<bool> src_cyclic = src_curves.cyclic();
  const VArray<int> src_material = *src_attributes.lookup_or_default(
      "material_index", bke::AttrDomain::Curve, 0);
-  const VArray<float> src_radius = *src_attributes.lookup<float>("radius", bke::AttrDomain::Point);
-  const VArray<float> src_opacity = *src_attributes.lookup<float>("opacity",
-                                                                  bke::AttrDomain::Point);
+  const VArray<float> src_radius = *src_attributes.lookup_or_default<float>(
+      "radius", bke::AttrDomain::Point, 0.01f);
+  const VArray<float> src_opacity = *src_attributes.lookup_or_default<float>(
+      "opacity", bke::AttrDomain::Point, 1.0f);

  /* Count new curves and points. */
  int dst_point_num = 0;