Cycles: Bump minimum supported CUDA architecture to sm_50

Pull Request: https://projects.blender.org/blender/blender/pulls/142212
2025-07-21 19:49:21 +02:00
parent d89c9c5155
commit ce0ae95ed3
7 changed files with 13 additions and 75 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -668,7 +668,7 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))

  option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
  set(CYCLES_CUDA_BINARIES_ARCH
-    sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75
+    sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75
    CACHE STRING "CUDA architectures to build binaries for"
  )
  option(WITH_CYCLES_CUDA_BUILD_SERIAL "\
--- a/intern/cycles/blender/addon/properties.py
+++ b/intern/cycles/blender/addon/properties.py
@@ -1811,7 +1811,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
            col.label(text=rpt_("No compatible GPUs found for Cycles"), icon='INFO', translate=False)

            if device_type == 'CUDA':
-                compute_capability = "3.0"
+                compute_capability = "5.0"
                col.label(text=rpt_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
                          icon='BLANK1', translate=False)
            elif device_type == 'OPTIX':
--- a/intern/cycles/device/cuda/device_impl.cpp
+++ b/intern/cycles/device/cuda/device_impl.cpp
@@ -146,10 +146,10 @@ bool CUDADevice::support_device(const uint /*kernel_features*/)
  cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
  cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);

-  /* We only support sm_30 and above */
-  if (major < 3) {
+  /* We only support sm_50 and above */
+  if (major < 5) {
    set_error(string_printf(
-        "CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor));
+        "CUDA backend requires compute capability 5.0 or up, but found %d.%d.", major, minor));
    return false;
  }

@@ -268,7 +268,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,

    /* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
    int ptx_major = major, ptx_minor = minor;
-    while (ptx_major >= 3) {
+    while (ptx_major >= 5) {
      const string ptx = path_get(
          string_printf("lib/%s_compute_%d%d.ptx.zst", name, ptx_major, ptx_minor));
      LOG_INFO << "Testing for pre-compiled kernel " << ptx << ".";
@@ -309,9 +309,9 @@ string CUDADevice::compile_kernel(const string &common_cflags,

 #  ifdef _WIN32
  if (!use_adaptive_compilation() && have_precompiled_kernels()) {
-    if (major < 3) {
+    if (major < 5) {
      set_error(
-          string_printf("CUDA backend requires compute capability 3.0 or up, but found %d.%d. "
+          string_printf("CUDA backend requires compute capability 5.0 or up, but found %d.%d. "
                        "Your GPU is not supported.",
                        major,
                        minor));
--- a/intern/cycles/device/cuda/util.h
+++ b/intern/cycles/device/cuda/util.h
@@ -52,7 +52,7 @@ static inline bool cudaSupportsDevice(const int cudaDevID)
 {
  int major;
  cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cudaDevID);
-  if (major >= 3) {
+  if (major >= 5) {
    return true;
  }
  return false;
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -571,32 +571,8 @@ if(WITH_CYCLES_CUDA_BINARIES)

  set(prev_arch "none")
  foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
-    if(${arch} MATCHES ".*_2.")
+    if(${arch} MATCHES ".*_3.")
      message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
-    elseif(${arch} MATCHES ".*_30")
-      if(DEFINED CUDA10_NVCC_EXECUTABLE)
-        set(cuda_nvcc_executable ${CUDA10_NVCC_EXECUTABLE})
-        set(cuda_toolkit_root_dir ${CUDA10_TOOLKIT_ROOT_DIR})
-        set(cuda_version 100)
-      elseif("${CUDA_VERSION}" LESS 110) # Support for sm_30 was removed in CUDA 11
-        set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
-        set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
-        set(cuda_version ${CUDA_VERSION})
-      else()
-        message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
-      endif()
-    elseif(${arch} MATCHES ".*_3.")
-      if(DEFINED CUDA11_NVCC_EXECUTABLE)
-        set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
-        set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
-        set(cuda_version 110)
-      elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12
-        set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
-        set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
-        set(cuda_version ${CUDA_VERSION})
-      else()
-        message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.")
-      endif()
    elseif(${arch} MATCHES "compute_7." AND DEFINED CUDA11_NVCC_EXECUTABLE)
      # Use CUDA 11 if available for the default PTX kernel. This allows us to
      # keep the driver requirements for user machines low.
--- a/intern/cycles/kernel/device/cuda/compat.h
+++ b/intern/cycles/kernel/device/cuda/compat.h
@@ -32,13 +32,8 @@ typedef unsigned long long uint64_t;

 #define ccl_device __device__ __inline__
 #define ccl_device_extern extern "C" __device__
-#if __CUDA_ARCH__ < 500
-#  define ccl_device_inline __device__ __forceinline__
-#  define ccl_device_forceinline __device__ __forceinline__
-#else
 #define ccl_device_inline __device__ __inline__
 #define ccl_device_forceinline __device__ __forceinline__
-#endif
 #define ccl_device_noinline __device__ __noinline__
 #define ccl_device_noinline_cpu ccl_device
 #define ccl_device_inline_method ccl_device
--- a/intern/cycles/kernel/device/cuda/config.h
+++ b/intern/cycles/kernel/device/cuda/config.h
@@ -12,41 +12,8 @@
 *   used by each threads limits the number of threads per block.
 */

-/* 3.0 and 3.5 */
-#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
-#  define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
-#  define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
-#  define GPU_BLOCK_MAX_THREADS 1024
-#  define GPU_THREAD_MAX_REGISTERS 63
-
-/* tunable parameters */
-#  define GPU_KERNEL_BLOCK_NUM_THREADS 256
-#  define GPU_KERNEL_MAX_REGISTERS 63
-
-/* 3.2 */
-#elif __CUDA_ARCH__ == 320
-#  define GPU_MULTIPRESSOR_MAX_REGISTERS 32768
-#  define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
-#  define GPU_BLOCK_MAX_THREADS 1024
-#  define GPU_THREAD_MAX_REGISTERS 63
-
-/* tunable parameters */
-#  define GPU_KERNEL_BLOCK_NUM_THREADS 256
-#  define GPU_KERNEL_MAX_REGISTERS 63
-
-/* 3.7 */
-#elif __CUDA_ARCH__ == 370
-#  define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
-#  define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
-#  define GPU_BLOCK_MAX_THREADS 1024
-#  define GPU_THREAD_MAX_REGISTERS 255
-
-/* tunable parameters */
-#  define GPU_KERNEL_BLOCK_NUM_THREADS 256
-#  define GPU_KERNEL_MAX_REGISTERS 63
-
 /* 5.x, 6.x */
-#elif __CUDA_ARCH__ <= 699
+#if __CUDA_ARCH__ <= 699
 #  define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
 #  define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
 #  define GPU_BLOCK_MAX_THREADS 1024