Cycles: Bump minimum supported CUDA architecture to sm_50
Pull Request: https://projects.blender.org/blender/blender/pulls/142212
This commit is contained in:
committed by
Thomas Dinges
parent
d89c9c5155
commit
ce0ae95ed3
@@ -668,7 +668,7 @@ if(NOT APPLE AND NOT (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM64"))
|
||||
|
||||
option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF)
|
||||
set(CYCLES_CUDA_BINARIES_ARCH
|
||||
sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75
|
||||
sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 sm_89 sm_120 compute_75
|
||||
CACHE STRING "CUDA architectures to build binaries for"
|
||||
)
|
||||
option(WITH_CYCLES_CUDA_BUILD_SERIAL "\
|
||||
|
||||
@@ -1811,7 +1811,7 @@ class CyclesPreferences(bpy.types.AddonPreferences):
|
||||
col.label(text=rpt_("No compatible GPUs found for Cycles"), icon='INFO', translate=False)
|
||||
|
||||
if device_type == 'CUDA':
|
||||
compute_capability = "3.0"
|
||||
compute_capability = "5.0"
|
||||
col.label(text=rpt_("Requires NVIDIA GPU with compute capability %s") % compute_capability,
|
||||
icon='BLANK1', translate=False)
|
||||
elif device_type == 'OPTIX':
|
||||
|
||||
@@ -146,10 +146,10 @@ bool CUDADevice::support_device(const uint /*kernel_features*/)
|
||||
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cuDevId);
|
||||
cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, cuDevId);
|
||||
|
||||
/* We only support sm_30 and above */
|
||||
if (major < 3) {
|
||||
/* We only support sm_50 and above */
|
||||
if (major < 5) {
|
||||
set_error(string_printf(
|
||||
"CUDA backend requires compute capability 3.0 or up, but found %d.%d.", major, minor));
|
||||
"CUDA backend requires compute capability 5.0 or up, but found %d.%d.", major, minor));
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -268,7 +268,7 @@ string CUDADevice::compile_kernel(const string &common_cflags,
|
||||
|
||||
/* The driver can JIT-compile PTX generated for older generations, so find the closest one. */
|
||||
int ptx_major = major, ptx_minor = minor;
|
||||
while (ptx_major >= 3) {
|
||||
while (ptx_major >= 5) {
|
||||
const string ptx = path_get(
|
||||
string_printf("lib/%s_compute_%d%d.ptx.zst", name, ptx_major, ptx_minor));
|
||||
LOG_INFO << "Testing for pre-compiled kernel " << ptx << ".";
|
||||
@@ -309,9 +309,9 @@ string CUDADevice::compile_kernel(const string &common_cflags,
|
||||
|
||||
# ifdef _WIN32
|
||||
if (!use_adaptive_compilation() && have_precompiled_kernels()) {
|
||||
if (major < 3) {
|
||||
if (major < 5) {
|
||||
set_error(
|
||||
string_printf("CUDA backend requires compute capability 3.0 or up, but found %d.%d. "
|
||||
string_printf("CUDA backend requires compute capability 5.0 or up, but found %d.%d. "
|
||||
"Your GPU is not supported.",
|
||||
major,
|
||||
minor));
|
||||
|
||||
@@ -52,7 +52,7 @@ static inline bool cudaSupportsDevice(const int cudaDevID)
|
||||
{
|
||||
int major;
|
||||
cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, cudaDevID);
|
||||
if (major >= 3) {
|
||||
if (major >= 5) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
@@ -571,32 +571,8 @@ if(WITH_CYCLES_CUDA_BINARIES)
|
||||
|
||||
set(prev_arch "none")
|
||||
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
||||
if(${arch} MATCHES ".*_2.")
|
||||
if(${arch} MATCHES ".*_3.")
|
||||
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
|
||||
elseif(${arch} MATCHES ".*_30")
|
||||
if(DEFINED CUDA10_NVCC_EXECUTABLE)
|
||||
set(cuda_nvcc_executable ${CUDA10_NVCC_EXECUTABLE})
|
||||
set(cuda_toolkit_root_dir ${CUDA10_TOOLKIT_ROOT_DIR})
|
||||
set(cuda_version 100)
|
||||
elseif("${CUDA_VERSION}" LESS 110) # Support for sm_30 was removed in CUDA 11
|
||||
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
||||
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
set(cuda_version ${CUDA_VERSION})
|
||||
else()
|
||||
message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
|
||||
endif()
|
||||
elseif(${arch} MATCHES ".*_3.")
|
||||
if(DEFINED CUDA11_NVCC_EXECUTABLE)
|
||||
set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
|
||||
set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
|
||||
set(cuda_version 110)
|
||||
elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12
|
||||
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
||||
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
||||
set(cuda_version ${CUDA_VERSION})
|
||||
else()
|
||||
message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.")
|
||||
endif()
|
||||
elseif(${arch} MATCHES "compute_7." AND DEFINED CUDA11_NVCC_EXECUTABLE)
|
||||
# Use CUDA 11 if available for the default PTX kernel. This allows us to
|
||||
# keep the driver requirements for user machines low.
|
||||
|
||||
@@ -32,13 +32,8 @@ typedef unsigned long long uint64_t;
|
||||
|
||||
#define ccl_device __device__ __inline__
|
||||
#define ccl_device_extern extern "C" __device__
|
||||
#if __CUDA_ARCH__ < 500
|
||||
# define ccl_device_inline __device__ __forceinline__
|
||||
# define ccl_device_forceinline __device__ __forceinline__
|
||||
#else
|
||||
#define ccl_device_inline __device__ __inline__
|
||||
#define ccl_device_forceinline __device__ __forceinline__
|
||||
#endif
|
||||
#define ccl_device_noinline __device__ __noinline__
|
||||
#define ccl_device_noinline_cpu ccl_device
|
||||
#define ccl_device_inline_method ccl_device
|
||||
|
||||
@@ -12,41 +12,8 @@
|
||||
* used by each threads limits the number of threads per block.
|
||||
*/
|
||||
|
||||
/* 3.0 and 3.5 */
|
||||
#if __CUDA_ARCH__ == 300 || __CUDA_ARCH__ == 350
|
||||
# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
|
||||
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
|
||||
# define GPU_BLOCK_MAX_THREADS 1024
|
||||
# define GPU_THREAD_MAX_REGISTERS 63
|
||||
|
||||
/* tunable parameters */
|
||||
# define GPU_KERNEL_BLOCK_NUM_THREADS 256
|
||||
# define GPU_KERNEL_MAX_REGISTERS 63
|
||||
|
||||
/* 3.2 */
|
||||
#elif __CUDA_ARCH__ == 320
|
||||
# define GPU_MULTIPRESSOR_MAX_REGISTERS 32768
|
||||
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
|
||||
# define GPU_BLOCK_MAX_THREADS 1024
|
||||
# define GPU_THREAD_MAX_REGISTERS 63
|
||||
|
||||
/* tunable parameters */
|
||||
# define GPU_KERNEL_BLOCK_NUM_THREADS 256
|
||||
# define GPU_KERNEL_MAX_REGISTERS 63
|
||||
|
||||
/* 3.7 */
|
||||
#elif __CUDA_ARCH__ == 370
|
||||
# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
|
||||
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 16
|
||||
# define GPU_BLOCK_MAX_THREADS 1024
|
||||
# define GPU_THREAD_MAX_REGISTERS 255
|
||||
|
||||
/* tunable parameters */
|
||||
# define GPU_KERNEL_BLOCK_NUM_THREADS 256
|
||||
# define GPU_KERNEL_MAX_REGISTERS 63
|
||||
|
||||
/* 5.x, 6.x */
|
||||
#elif __CUDA_ARCH__ <= 699
|
||||
#if __CUDA_ARCH__ <= 699
|
||||
# define GPU_MULTIPRESSOR_MAX_REGISTERS 65536
|
||||
# define GPU_MULTIPROCESSOR_MAX_BLOCKS 32
|
||||
# define GPU_BLOCK_MAX_THREADS 1024
|
||||
|
||||
Reference in New Issue
Block a user