Vulkan: Disable local read on non Qualcomm devices

Only enable by default dynamic rendering local read on Qualcomm devices. NVIDIA, AMD and Intel performance is better when disabled (20%). On Qualcomm devices the improvement can be substantial (16% on shader_balls.blend). `--debug-gpu-vulkan-local-read` can be used to use dynamic rendering local read on any supported platform. Future: Check if bottleneck is during command building. If so we could fine-tune this after the device command building landed (#T132682). Pull Request: https://projects.blender.org/blender/blender/pulls/132981
2025-01-13 09:29:16 +01:00
parent 75dc76bceb
commit a4914b8972
3 changed files with 39 additions and 10 deletions
--- a/source/blender/blenkernel/BKE_global.hh
+++ b/source/blender/blenkernel/BKE_global.hh
@@ -259,17 +259,18 @@ enum {
                                             * assigned to ID datablocks */
  G_DEBUG_DEPSGRAPH = (G_DEBUG_DEPSGRAPH_BUILD | G_DEBUG_DEPSGRAPH_EVAL | G_DEBUG_DEPSGRAPH_TAG |
                       G_DEBUG_DEPSGRAPH_TIME | G_DEBUG_DEPSGRAPH_UID),
-  G_DEBUG_SIMDATA = (1 << 15),               /* sim debug data display */
-  G_DEBUG_GPU = (1 << 16),                   /* gpu debug */
-  G_DEBUG_IO = (1 << 17),                    /* IO Debugging (for Collada, ...). */
-  G_DEBUG_GPU_FORCE_WORKAROUNDS = (1 << 18), /* Force GPU workarounds bypassing detection. */
-  G_DEBUG_GPU_COMPILE_SHADERS = (1 << 19),   /* Compile all statically defined shaders. . */
-  G_DEBUG_GPU_RENDERDOC = (1 << 20),         /* Enable RenderDoc integration. */
-  G_DEBUG_XR = (1 << 21),                    /* XR/OpenXR messages */
-  G_DEBUG_XR_TIME = (1 << 22),               /* XR/OpenXR timing messages */
+  G_DEBUG_SIMDATA = (1 << 15),                     /* sim debug data display */
+  G_DEBUG_GPU = (1 << 16),                         /* gpu debug */
+  G_DEBUG_IO = (1 << 17),                          /* IO Debugging (for Collada, ...). */
+  G_DEBUG_GPU_FORCE_WORKAROUNDS = (1 << 18),       /* Force GPU workarounds bypassing detection. */
+  G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ = (1 << 19), /* Force GPU dynamic rendering local read. */
+  G_DEBUG_GPU_COMPILE_SHADERS = (1 << 20),         /* Compile all statically defined shaders. . */
+  G_DEBUG_GPU_RENDERDOC = (1 << 21),               /* Enable RenderDoc integration. */
+  G_DEBUG_XR = (1 << 22),                          /* XR/OpenXR messages */
+  G_DEBUG_XR_TIME = (1 << 23),                     /* XR/OpenXR timing messages */

-  G_DEBUG_GHOST = (1 << 23),  /* Debug GHOST module. */
-  G_DEBUG_WINTAB = (1 << 24), /* Debug Wintab. */
+  G_DEBUG_GHOST = (1 << 24),  /* Debug GHOST module. */
+  G_DEBUG_WINTAB = (1 << 25), /* Debug Wintab. */
 };

 #define G_DEBUG_ALL \
--- a/source/blender/gpu/vulkan/vk_backend.cc
+++ b/source/blender/gpu/vulkan/vk_backend.cc
@@ -386,6 +386,22 @@ void VKBackend::detect_workarounds(VKDevice &device)
    workarounds.not_aligned_pixel_formats = true;
  }

+  /* Only enable by default dynamic rendering local read on Qualcomm devices. NVIDIA, AMD and Intel
+   * performance is better when disabled (20%). On Qualcomm devices the improvement can be
+   * substantial (16% on shader_balls.blend).
+   *
+   * `--debug-gpu-vulkan-local-read` can be used to use dynamic rendering local read on any
+   * supported platform.
+   *
+   * TODO: Check if bottleneck is during command building. If so we could fine-tune this after the
+   * device command building landed (T132682).
+   */
+  if ((G.debug & G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ) == 0 &&
+      !GPU_type_matches(GPU_DEVICE_QUALCOMM, GPU_OS_ANY, GPU_DRIVER_ANY))
+  {
+    workarounds.dynamic_rendering_local_read = true;
+  }
+
  VkFormatProperties format_properties = {};
  vkGetPhysicalDeviceFormatProperties(
      device.physical_device_get(), VK_FORMAT_R8G8B8_UNORM, &format_properties);
--- a/source/creator/creator_args.cc
+++ b/source/creator/creator_args.cc
@@ -1347,6 +1347,11 @@ static const char arg_handle_debug_mode_generic_set_doc_depsgraph_uid[] =
 static const char arg_handle_debug_mode_generic_set_doc_gpu_force_workarounds[] =
    "\n\t"
    "Enable workarounds for typical GPU issues and disable all GPU extensions.";
+#  ifdef WITH_VULKAN_BACKEND
+static const char arg_handle_debug_mode_generic_set_doc_gpu_force_vulkan_local_read[] =
+    "\n\t"
+    "Force Vulkan dynamic rendering local read when supported by device.";
+#  endif

 static int arg_handle_debug_mode_generic_set(int /*argc*/, const char ** /*argv*/, void *data)
 {
@@ -2853,6 +2858,13 @@ void main_args_setup(bContext *C, bArgs *ba, bool all)
               "--debug-gpu-force-workarounds",
               CB_EX(arg_handle_debug_mode_generic_set, gpu_force_workarounds),
               (void *)G_DEBUG_GPU_FORCE_WORKAROUNDS);
+#  ifdef WITH_VULKAN_BACKEND
+  BLI_args_add(ba,
+               nullptr,
+               "--debug-gpu-vulkan-local-read",
+               CB_EX(arg_handle_debug_mode_generic_set, gpu_force_vulkan_local_read),
+               (void *)G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ);
+#  endif
  BLI_args_add(ba, nullptr, "--debug-exit-on-error", CB(arg_handle_debug_exit_on_error), nullptr);

  BLI_args_add(ba, nullptr, "--verbose", CB(arg_handle_verbosity_set), nullptr);