From a4914b8972a4b6840b3ba7fd3292bba934defee5 Mon Sep 17 00:00:00 2001
From: Jeroen Bakker <jeroen@blender.org>
Date: Mon, 13 Jan 2025 09:29:16 +0100
Subject: [PATCH] Vulkan: Disable local read on non Qualcomm devices

Only enable by default dynamic rendering local read on Qualcomm devices. NVIDIA, AMD and Intel
performance is better when disabled (20%). On Qualcomm devices the improvement can be
substantial (16% on shader_balls.blend).

`--debug-gpu-vulkan-local-read` can be used to use dynamic rendering local read on any
supported platform.

Future: Check if bottleneck is during command building. If so we could fine-tune this after the
device command building landed (#T132682).

Pull Request: https://projects.blender.org/blender/blender/pulls/132981
---
 source/blender/blenkernel/BKE_global.hh | 21 +++++++++++----------
 source/blender/gpu/vulkan/vk_backend.cc | 16 ++++++++++++++++
 source/creator/creator_args.cc          | 12 ++++++++++++
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/source/blender/blenkernel/BKE_global.hh b/source/blender/blenkernel/BKE_global.hh
index b8d713e5d3f..9f5d000e86e 100644
--- a/source/blender/blenkernel/BKE_global.hh
+++ b/source/blender/blenkernel/BKE_global.hh
@@ -259,17 +259,18 @@ enum {
                                              * assigned to ID datablocks */
   G_DEBUG_DEPSGRAPH = (G_DEBUG_DEPSGRAPH_BUILD | G_DEBUG_DEPSGRAPH_EVAL | G_DEBUG_DEPSGRAPH_TAG |
                        G_DEBUG_DEPSGRAPH_TIME | G_DEBUG_DEPSGRAPH_UID),
-  G_DEBUG_SIMDATA = (1 << 15),               /* sim debug data display */
-  G_DEBUG_GPU = (1 << 16),                   /* gpu debug */
-  G_DEBUG_IO = (1 << 17),                    /* IO Debugging (for Collada, ...). */
-  G_DEBUG_GPU_FORCE_WORKAROUNDS = (1 << 18), /* Force GPU workarounds bypassing detection. */
-  G_DEBUG_GPU_COMPILE_SHADERS = (1 << 19),   /* Compile all statically defined shaders. . */
-  G_DEBUG_GPU_RENDERDOC = (1 << 20),         /* Enable RenderDoc integration. */
-  G_DEBUG_XR = (1 << 21),                    /* XR/OpenXR messages */
-  G_DEBUG_XR_TIME = (1 << 22),               /* XR/OpenXR timing messages */
+  G_DEBUG_SIMDATA = (1 << 15),                     /* sim debug data display */
+  G_DEBUG_GPU = (1 << 16),                         /* gpu debug */
+  G_DEBUG_IO = (1 << 17),                          /* IO Debugging (for Collada, ...). */
+  G_DEBUG_GPU_FORCE_WORKAROUNDS = (1 << 18),       /* Force GPU workarounds bypassing detection. */
+  G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ = (1 << 19), /* Force GPU dynamic rendering local read. */
+  G_DEBUG_GPU_COMPILE_SHADERS = (1 << 20),         /* Compile all statically defined shaders. . */
+  G_DEBUG_GPU_RENDERDOC = (1 << 21),               /* Enable RenderDoc integration. */
+  G_DEBUG_XR = (1 << 22),                          /* XR/OpenXR messages */
+  G_DEBUG_XR_TIME = (1 << 23),                     /* XR/OpenXR timing messages */
 
-  G_DEBUG_GHOST = (1 << 23),  /* Debug GHOST module. */
-  G_DEBUG_WINTAB = (1 << 24), /* Debug Wintab. */
+  G_DEBUG_GHOST = (1 << 24),  /* Debug GHOST module. */
+  G_DEBUG_WINTAB = (1 << 25), /* Debug Wintab. */
 };
 
 #define G_DEBUG_ALL \
diff --git a/source/blender/gpu/vulkan/vk_backend.cc b/source/blender/gpu/vulkan/vk_backend.cc
index 026bab6cdb3..b29960a1b64 100644
--- a/source/blender/gpu/vulkan/vk_backend.cc
+++ b/source/blender/gpu/vulkan/vk_backend.cc
@@ -386,6 +386,22 @@ void VKBackend::detect_workarounds(VKDevice &device)
     workarounds.not_aligned_pixel_formats = true;
   }
 
+  /* Only enable by default dynamic rendering local read on Qualcomm devices. NVIDIA, AMD and Intel
+   * performance is better when disabled (20%). On Qualcomm devices the improvement can be
+   * substantial (16% on shader_balls.blend).
+   *
+   * `--debug-gpu-vulkan-local-read` can be used to use dynamic rendering local read on any
+   * supported platform.
+   *
+   * TODO: Check if bottleneck is during command building. If so we could fine-tune this after the
+   * device command building landed (T132682).
+   */
+  if ((G.debug & G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ) == 0 &&
+      !GPU_type_matches(GPU_DEVICE_QUALCOMM, GPU_OS_ANY, GPU_DRIVER_ANY))
+  {
+    workarounds.dynamic_rendering_local_read = true;
+  }
+
   VkFormatProperties format_properties = {};
   vkGetPhysicalDeviceFormatProperties(
       device.physical_device_get(), VK_FORMAT_R8G8B8_UNORM, &format_properties);
diff --git a/source/creator/creator_args.cc b/source/creator/creator_args.cc
index 1052b6b002c..98da177ead9 100644
--- a/source/creator/creator_args.cc
+++ b/source/creator/creator_args.cc
@@ -1347,6 +1347,11 @@ static const char arg_handle_debug_mode_generic_set_doc_depsgraph_uid[] =
 static const char arg_handle_debug_mode_generic_set_doc_gpu_force_workarounds[] =
     "\n\t"
     "Enable workarounds for typical GPU issues and disable all GPU extensions.";
+#  ifdef WITH_VULKAN_BACKEND
+static const char arg_handle_debug_mode_generic_set_doc_gpu_force_vulkan_local_read[] =
+    "\n\t"
+    "Force Vulkan dynamic rendering local read when supported by device.";
+#  endif
 
 static int arg_handle_debug_mode_generic_set(int /*argc*/, const char ** /*argv*/, void *data)
 {
@@ -2853,6 +2858,13 @@ void main_args_setup(bContext *C, bArgs *ba, bool all)
                "--debug-gpu-force-workarounds",
                CB_EX(arg_handle_debug_mode_generic_set, gpu_force_workarounds),
                (void *)G_DEBUG_GPU_FORCE_WORKAROUNDS);
+#  ifdef WITH_VULKAN_BACKEND
+  BLI_args_add(ba,
+               nullptr,
+               "--debug-gpu-vulkan-local-read",
+               CB_EX(arg_handle_debug_mode_generic_set, gpu_force_vulkan_local_read),
+               (void *)G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ);
+#  endif
   BLI_args_add(ba, nullptr, "--debug-exit-on-error", CB(arg_handle_debug_exit_on_error), nullptr);
 
   BLI_args_add(ba, nullptr, "--verbose", CB(arg_handle_verbosity_set), nullptr);