From a4914b8972a4b6840b3ba7fd3292bba934defee5 Mon Sep 17 00:00:00 2001 From: Jeroen Bakker Date: Mon, 13 Jan 2025 09:29:16 +0100 Subject: [PATCH] Vulkan: Disable local read on non Qualcomm devices Only enable by default dynamic rendering local read on Qualcomm devices. NVIDIA, AMD and Intel performance is better when disabled (20%). On Qualcomm devices the improvement can be substantial (16% on shader_balls.blend). `--debug-gpu-vulkan-local-read` can be used to use dynamic rendering local read on any supported platform. Future: Check if bottleneck is during command building. If so we could fine-tune this after the device command building landed (#T132682). Pull Request: https://projects.blender.org/blender/blender/pulls/132981 --- source/blender/blenkernel/BKE_global.hh | 21 +++++++++++---------- source/blender/gpu/vulkan/vk_backend.cc | 16 ++++++++++++++++ source/creator/creator_args.cc | 12 ++++++++++++ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/source/blender/blenkernel/BKE_global.hh b/source/blender/blenkernel/BKE_global.hh index b8d713e5d3f..9f5d000e86e 100644 --- a/source/blender/blenkernel/BKE_global.hh +++ b/source/blender/blenkernel/BKE_global.hh @@ -259,17 +259,18 @@ enum { * assigned to ID datablocks */ G_DEBUG_DEPSGRAPH = (G_DEBUG_DEPSGRAPH_BUILD | G_DEBUG_DEPSGRAPH_EVAL | G_DEBUG_DEPSGRAPH_TAG | G_DEBUG_DEPSGRAPH_TIME | G_DEBUG_DEPSGRAPH_UID), - G_DEBUG_SIMDATA = (1 << 15), /* sim debug data display */ - G_DEBUG_GPU = (1 << 16), /* gpu debug */ - G_DEBUG_IO = (1 << 17), /* IO Debugging (for Collada, ...). */ - G_DEBUG_GPU_FORCE_WORKAROUNDS = (1 << 18), /* Force GPU workarounds bypassing detection. */ - G_DEBUG_GPU_COMPILE_SHADERS = (1 << 19), /* Compile all statically defined shaders. . */ - G_DEBUG_GPU_RENDERDOC = (1 << 20), /* Enable RenderDoc integration. */ - G_DEBUG_XR = (1 << 21), /* XR/OpenXR messages */ - G_DEBUG_XR_TIME = (1 << 22), /* XR/OpenXR timing messages */ + G_DEBUG_SIMDATA = (1 << 15), /* sim debug data display */ + G_DEBUG_GPU = (1 << 16), /* gpu debug */ + G_DEBUG_IO = (1 << 17), /* IO Debugging (for Collada, ...). */ + G_DEBUG_GPU_FORCE_WORKAROUNDS = (1 << 18), /* Force GPU workarounds bypassing detection. */ + G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ = (1 << 19), /* Force GPU dynamic rendering local read. */ + G_DEBUG_GPU_COMPILE_SHADERS = (1 << 20), /* Compile all statically defined shaders. . */ + G_DEBUG_GPU_RENDERDOC = (1 << 21), /* Enable RenderDoc integration. */ + G_DEBUG_XR = (1 << 22), /* XR/OpenXR messages */ + G_DEBUG_XR_TIME = (1 << 23), /* XR/OpenXR timing messages */ - G_DEBUG_GHOST = (1 << 23), /* Debug GHOST module. */ - G_DEBUG_WINTAB = (1 << 24), /* Debug Wintab. */ + G_DEBUG_GHOST = (1 << 24), /* Debug GHOST module. */ + G_DEBUG_WINTAB = (1 << 25), /* Debug Wintab. */ }; #define G_DEBUG_ALL \ diff --git a/source/blender/gpu/vulkan/vk_backend.cc b/source/blender/gpu/vulkan/vk_backend.cc index 026bab6cdb3..b29960a1b64 100644 --- a/source/blender/gpu/vulkan/vk_backend.cc +++ b/source/blender/gpu/vulkan/vk_backend.cc @@ -386,6 +386,22 @@ void VKBackend::detect_workarounds(VKDevice &device) workarounds.not_aligned_pixel_formats = true; } + /* Only enable by default dynamic rendering local read on Qualcomm devices. NVIDIA, AMD and Intel + * performance is better when disabled (20%). On Qualcomm devices the improvement can be + * substantial (16% on shader_balls.blend). + * + * `--debug-gpu-vulkan-local-read` can be used to use dynamic rendering local read on any + * supported platform. + * + * TODO: Check if bottleneck is during command building. If so we could fine-tune this after the + * device command building landed (T132682). + */ + if ((G.debug & G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ) == 0 && + !GPU_type_matches(GPU_DEVICE_QUALCOMM, GPU_OS_ANY, GPU_DRIVER_ANY)) + { + workarounds.dynamic_rendering_local_read = true; + } + VkFormatProperties format_properties = {}; vkGetPhysicalDeviceFormatProperties( device.physical_device_get(), VK_FORMAT_R8G8B8_UNORM, &format_properties); diff --git a/source/creator/creator_args.cc b/source/creator/creator_args.cc index 1052b6b002c..98da177ead9 100644 --- a/source/creator/creator_args.cc +++ b/source/creator/creator_args.cc @@ -1347,6 +1347,11 @@ static const char arg_handle_debug_mode_generic_set_doc_depsgraph_uid[] = static const char arg_handle_debug_mode_generic_set_doc_gpu_force_workarounds[] = "\n\t" "Enable workarounds for typical GPU issues and disable all GPU extensions."; +# ifdef WITH_VULKAN_BACKEND +static const char arg_handle_debug_mode_generic_set_doc_gpu_force_vulkan_local_read[] = + "\n\t" + "Force Vulkan dynamic rendering local read when supported by device."; +# endif static int arg_handle_debug_mode_generic_set(int /*argc*/, const char ** /*argv*/, void *data) { @@ -2853,6 +2858,13 @@ void main_args_setup(bContext *C, bArgs *ba, bool all) "--debug-gpu-force-workarounds", CB_EX(arg_handle_debug_mode_generic_set, gpu_force_workarounds), (void *)G_DEBUG_GPU_FORCE_WORKAROUNDS); +# ifdef WITH_VULKAN_BACKEND + BLI_args_add(ba, + nullptr, + "--debug-gpu-vulkan-local-read", + CB_EX(arg_handle_debug_mode_generic_set, gpu_force_vulkan_local_read), + (void *)G_DEBUG_GPU_FORCE_VULKAN_LOCAL_READ); +# endif BLI_args_add(ba, nullptr, "--debug-exit-on-error", CB(arg_handle_debug_exit_on_error), nullptr); BLI_args_add(ba, nullptr, "--verbose", CB(arg_handle_verbosity_set), nullptr);