From ce0ae95ed3f3bc748bffb32e8b284047c3d115f1 Mon Sep 17 00:00:00 2001 From: Thomas Dinges Date: Mon, 21 Jul 2025 19:49:21 +0200 Subject: [PATCH] Cycles: Bump minimum supported CUDA architecture to sm_50 Pull Request: https://projects.blender.org/blender/blender/pulls/142212 --- CMakeLists.txt | 2 +- intern/cycles/blender/addon/properties.py | 2 +- intern/cycles/device/cuda/device_impl.cpp | 12 ++++---- intern/cycles/device/cuda/util.h | 2 +- intern/cycles/kernel/CMakeLists.txt | 26 +---------------- intern/cycles/kernel/device/cuda/compat.h | 9 ++---- intern/cycles/kernel/device/cuda/config.h | 35 +---------------------- 7 files changed, 13 insertions(+), 75 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb81fbbfbe4..dc3c479fa3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -668,7 +668,7 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64")) option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF) set(CYCLES_CUDA_BINARIES_ARCH - sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75 + sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75 CACHE STRING "CUDA architectures to build binaries for" ) option(WITH_CYCLES_CUDA_BUILD_SERIAL "\ diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index ae0dc47d4af..1f275f08c26 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1811,7 +1811,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): col.label(text=rpt_("No compatible GPUs found for Cycles"), icon='INFO', translate=False) if device_type == 'CUDA': - compute_capability = "3.0" + compute_capability = "5.0" col.label(text=rpt_("Requires NVIDIA GPU with compute capability %s") % compute_capability, icon='BLANK1', translate=False) elif device_type == 'OPTIX': diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index eb85b808684..050fca03158 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -146,10 +146,10 @@ bool CUDADevice::support_device(const uint /*kernel_features*/) cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId); cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId); - /* We only support sm_30 and above */ - if (major < 3) { + /* We only support sm_50 and above */ + if (major < 5) { set_error(string_printf( - "CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor)); + "CUDA backend requires compute capability 5.0 or up, but found %d.%d.", major, minor)); return false; } @@ -268,7 +268,7 @@ string CUDADevice::compile_kernel(const string &common_cflags, /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */ int ptx_major = major, ptx_minor = minor; - while (ptx_major >= 3) { + while (ptx_major >= 5) { const string ptx = path_get( string_printf("lib/%s_compute_%d%d.ptx.zst", name, ptx_major, ptx_minor)); LOG_INFO << "Testing for pre-compiled kernel " << ptx << "."; @@ -309,9 +309,9 @@ string CUDADevice::compile_kernel(const string &common_cflags, # ifdef _WIN32 if (!use_adaptive_compilation() && have_precompiled_kernels()) { - if (major < 3) { + if (major < 5) { set_error( - string_printf("CUDA backend requires compute capability 3.0 or up, but found %d.%d. " + string_printf("CUDA backend requires compute capability 5.0 or up, but found %d.%d. " "Your GPU is not supported.", major, minor)); diff --git a/intern/cycles/device/cuda/util.h b/intern/cycles/device/cuda/util.h index 3a3dbe92936..c5ab23b6d8e 100644 --- a/intern/cycles/device/cuda/util.h +++ b/intern/cycles/device/cuda/util.h @@ -52,7 +52,7 @@ static inline bool cudaSupportsDevice(const int cudaDevID) { int major; cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cudaDevID); - if (major >= 3) { + if (major >= 5) { return true; } return false; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 412c91aee0e..60508f1473b 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -571,32 +571,8 @@ if(WITH_CYCLES_CUDA_BINARIES) set(prev_arch "none") foreach(arch ${CYCLES_CUDA_BINARIES_ARCH}) - if(${arch} MATCHES ".*_2.") + if(${arch} MATCHES ".*_3.") message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.") - elseif(${arch} MATCHES ".*_30") - if(DEFINED CUDA10_NVCC_EXECUTABLE) - set(cuda_nvcc_executable ${CUDA10_NVCC_EXECUTABLE}) - set(cuda_toolkit_root_dir ${CUDA10_TOOLKIT_ROOT_DIR}) - set(cuda_version 100) - elseif("${CUDA_VERSION}" LESS 110) # Support for sm_30 was removed in CUDA 11 - set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE}) - set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR}) - set(cuda_version ${CUDA_VERSION}) - else() - message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.") - endif() - elseif(${arch} MATCHES ".*_3.") - if(DEFINED CUDA11_NVCC_EXECUTABLE) - set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE}) - set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR}) - set(cuda_version 110) - elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12 - set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE}) - set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR}) - set(cuda_version ${CUDA_VERSION}) - else() - message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.") - endif() elseif(${arch} MATCHES "compute_7." AND DEFINED CUDA11_NVCC_EXECUTABLE) # Use CUDA 11 if available for the default PTX kernel. This allows us to # keep the driver requirements for user machines low. diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h index fbf90efea2c..d4f9f813e4b 100644 --- a/intern/cycles/kernel/device/cuda/compat.h +++ b/intern/cycles/kernel/device/cuda/compat.h @@ -32,13 +32,8 @@ typedef unsigned long long uint64_t; #define ccl_device __device__ __inline__ #define ccl_device_extern extern "C" __device__ -#if __CUDA_ARCH__ < 500 -# define ccl_device_inline __device__ __forceinline__ -# define ccl_device_forceinline __device__ __forceinline__ -#else -# define ccl_device_inline __device__ __inline__ -# define ccl_device_forceinline __device__ __forceinline__ -#endif +#define ccl_device_inline __device__ __inline__ +#define ccl_device_forceinline __device__ __forceinline__ #define ccl_device_noinline __device__ __noinline__ #define ccl_device_noinline_cpu ccl_device #define ccl_device_inline_method ccl_device diff --git a/intern/cycles/kernel/device/cuda/config.h b/intern/cycles/kernel/device/cuda/config.h index 6506775cd9e..3459820d969 100644 --- a/intern/cycles/kernel/device/cuda/config.h +++ b/intern/cycles/kernel/device/cuda/config.h @@ -12,41 +12,8 @@ * used by each threads limits the number of threads per block. */ -/* 3.0 and 3.5 */ -#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350 -# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536 -# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16 -# define GPU_BLOCK_MAX_THREADS 1024 -# define GPU_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define GPU_KERNEL_BLOCK_NUM_THREADS 256 -# define GPU_KERNEL_MAX_REGISTERS 63 - -/* 3.2 */ -#elif __CUDA_ARCH__ == 320 -# define GPU_MULTIPRESSOR_MAX_REGISTERS 32768 -# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16 -# define GPU_BLOCK_MAX_THREADS 1024 -# define GPU_THREAD_MAX_REGISTERS 63 - -/* tunable parameters */ -# define GPU_KERNEL_BLOCK_NUM_THREADS 256 -# define GPU_KERNEL_MAX_REGISTERS 63 - -/* 3.7 */ -#elif __CUDA_ARCH__ == 370 -# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536 -# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16 -# define GPU_BLOCK_MAX_THREADS 1024 -# define GPU_THREAD_MAX_REGISTERS 255 - -/* tunable parameters */ -# define GPU_KERNEL_BLOCK_NUM_THREADS 256 -# define GPU_KERNEL_MAX_REGISTERS 63 - /* 5.x, 6.x */ -#elif __CUDA_ARCH__ <= 699 +#if __CUDA_ARCH__ <= 699 # define GPU_MULTIPRESSOR_MAX_REGISTERS 65536 # define GPU_MULTIPROCESSOR_MAX_BLOCKS 32 # define GPU_BLOCK_MAX_THREADS 1024