Cycles: Bump minimum supported CUDA architecture to sm_50

Pull Request: https://projects.blender.org/blender/blender/pulls/142212
This commit is contained in:
Thomas Dinges
2025-07-21 19:49:21 +02:00
committed by Thomas Dinges
parent d89c9c5155
commit ce0ae95ed3
7 changed files with 13 additions and 75 deletions

View File

@@ -668,7 +668,7 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
set(CYCLES_CUDA_BINARIES_ARCH
sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75
sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75
CACHE STRING "CUDA architectures to build binaries for"
)
option(WITH_CYCLES_CUDA_BUILD_SERIAL "\

View File

@@ -1811,7 +1811,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
col.label(text=rpt_("No compatible GPUs found for Cycles"), icon='INFO', translate=False)
if device_type == 'CUDA':
compute_capability = "3.0"
compute_capability = "5.0"
col.label(text=rpt_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
icon='BLANK1', translate=False)
elif device_type == 'OPTIX':

View File

@@ -146,10 +146,10 @@ bool CUDADevice::support_device(const uint /*kernel_features*/)
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
/* We only support sm_30 and above */
if (major < 3) {
/* We only support sm_50 and above */
if (major < 5) {
set_error(string_printf(
"CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor));
"CUDA backend requires compute capability 5.0 or up, but found %d.%d.", major, minor));
return false;
}
@@ -268,7 +268,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
/* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
int ptx_major = major, ptx_minor = minor;
while (ptx_major >= 3) {
while (ptx_major >= 5) {
const string ptx = path_get(
string_printf("lib/%s_compute_%d%d.ptx.zst", name, ptx_major, ptx_minor));
LOG_INFO << "Testing for pre-compiled kernel " << ptx << ".";
@@ -309,9 +309,9 @@ string CUDADevice::compile_kernel(const string &common_cflags,
# ifdef _WIN32
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
if (major < 3) {
if (major < 5) {
set_error(
string_printf("CUDA backend requires compute capability 3.0 or up, but found %d.%d. "
string_printf("CUDA backend requires compute capability 5.0 or up, but found %d.%d. "
"Your GPU is not supported.",
major,
minor));

View File

@@ -52,7 +52,7 @@ static inline bool cudaSupportsDevice(const int cudaDevID)
{
int major;
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cudaDevID);
if (major >= 3) {
if (major >= 5) {
return true;
}
return false;

View File

@@ -571,32 +571,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
set(prev_arch "none")
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
if(${arch} MATCHES ".*_2.")
if(${arch} MATCHES ".*_3.")
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
elseif(${arch} MATCHES ".*_30")
if(DEFINED CUDA10_NVCC_EXECUTABLE)
set(cuda_nvcc_executable ${CUDA10_NVCC_EXECUTABLE})
set(cuda_toolkit_root_dir ${CUDA10_TOOLKIT_ROOT_DIR})
set(cuda_version 100)
elseif("${CUDA_VERSION}" LESS 110) # Support for sm_30 was removed in CUDA 11
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
set(cuda_version ${CUDA_VERSION})
else()
message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
endif()
elseif(${arch} MATCHES ".*_3.")
if(DEFINED CUDA11_NVCC_EXECUTABLE)
set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
set(cuda_version 110)
elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
set(cuda_version ${CUDA_VERSION})
else()
message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.")
endif()
elseif(${arch} MATCHES "compute_7." AND DEFINED CUDA11_NVCC_EXECUTABLE)
# Use CUDA 11 if available for the default PTX kernel. This allows us to
# keep the driver requirements for user machines low.

View File

@@ -32,13 +32,8 @@ typedef unsigned long long uint64_t;
#define ccl_device __device__ __inline__
#define ccl_device_extern extern "C" __device__
#if __CUDA_ARCH__ < 500
# define ccl_device_inline __device__ __forceinline__
# define ccl_device_forceinline __device__ __forceinline__
#else
#define ccl_device_inline __device__ __inline__
#define ccl_device_forceinline __device__ __forceinline__
#endif
#define ccl_device_noinline __device__ __noinline__
#define ccl_device_noinline_cpu ccl_device
#define ccl_device_inline_method ccl_device

View File

@@ -12,41 +12,8 @@
* used by each threads limits the number of threads per block.
*/
/* 3.0 and 3.5 */
#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
# define GPU_BLOCK_MAX_THREADS 1024
# define GPU_THREAD_MAX_REGISTERS 63
/* tunable parameters */
# define GPU_KERNEL_BLOCK_NUM_THREADS 256
# define GPU_KERNEL_MAX_REGISTERS 63
/* 3.2 */
#elif __CUDA_ARCH__ == 320
# define GPU_MULTIPRESSOR_MAX_REGISTERS 32768
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
# define GPU_BLOCK_MAX_THREADS 1024
# define GPU_THREAD_MAX_REGISTERS 63
/* tunable parameters */
# define GPU_KERNEL_BLOCK_NUM_THREADS 256
# define GPU_KERNEL_MAX_REGISTERS 63
/* 3.7 */
#elif __CUDA_ARCH__ == 370
# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
# define GPU_BLOCK_MAX_THREADS 1024
# define GPU_THREAD_MAX_REGISTERS 255
/* tunable parameters */
# define GPU_KERNEL_BLOCK_NUM_THREADS 256
# define GPU_KERNEL_MAX_REGISTERS 63
/* 5.x, 6.x */
#elif __CUDA_ARCH__ <= 699
#if __CUDA_ARCH__ <= 699
# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
# define GPU_BLOCK_MAX_THREADS 1024