diff --git a/CMakeLists.txt b/CMakeLists.txt index 068276da155..7155c15079e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -467,7 +467,6 @@ if(NOT APPLE) option(WITH_CYCLES_CUDA_BINARIES "Build Cycles NVIDIA CUDA binaries" OFF) set(CYCLES_CUDA_BINARIES_ARCH sm_30 sm_35 sm_37 sm_50 sm_52 sm_60 sm_61 sm_70 sm_75 sm_86 compute_75 CACHE STRING "CUDA architectures to build binaries for") - option(WITH_CYCLES_CUBIN_COMPILER "Build cubins with nvrtc based compiler instead of nvcc" OFF) option(WITH_CYCLES_CUDA_BUILD_SERIAL "Build cubins one after another (useful on machines with limited RAM)" OFF) option(WITH_CUDA_DYNLOAD "Dynamically load CUDA libraries at runtime (for developers, makes cuda-gdb work)" ON) @@ -475,7 +474,6 @@ if(NOT APPLE) set(CYCLES_RUNTIME_OPTIX_ROOT_DIR "" CACHE PATH "Path to the OptiX SDK root directory. When set, this path will be used at runtime to compile OptiX kernels.") mark_as_advanced(CYCLES_CUDA_BINARIES_ARCH) - mark_as_advanced(WITH_CYCLES_CUBIN_COMPILER) mark_as_advanced(WITH_CYCLES_CUDA_BUILD_SERIAL) mark_as_advanced(WITH_CUDA_DYNLOAD) mark_as_advanced(OPTIX_ROOT_DIR) diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index c6590a07ee4..22942e1fc46 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -423,7 +423,7 @@ if(WITH_CYCLES_HYDRA_RENDER_DELEGATE AND (NOT WITH_BLENDER) AND (NOT WITH_CYCLES set(CYCLES_INSTALL_PATH ${CYCLES_INSTALL_PATH}/hdCycles/resources) endif() -if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) +if(WITH_CYCLES_CUDA_BINARIES) if(MSVC) set(MAX_MSVC 1800) if(${CUDA_VERSION} EQUAL "8.0") @@ -435,24 +435,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND (NOT WITH_CYCLES_CUBIN_COMPILER)) elseif(${CUDA_VERSION} VERSION_GREATER_EQUAL 10.0) set(MAX_MSVC 1999) endif() - if(NOT MSVC_VERSION LESS ${MAX_MSVC} OR CMAKE_C_COMPILER_ID MATCHES "Clang") - message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.") - set(WITH_CYCLES_CUBIN_COMPILER ON) - endif() unset(MAX_MSVC) - elseif(APPLE) - if(NOT (${XCODE_VERSION} VERSION_LESS 10.0)) - message(STATUS "nvcc not supported for this compiler version, using cycles_cubin_cc instead.") - set(WITH_CYCLES_CUBIN_COMPILER ON) - endif() - endif() -endif() - -# NVRTC gives wrong rendering result in CUDA 10.0, so we must use NVCC. -if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_CUBIN_COMPILER AND NOT WITH_CYCLES_CUBIN_COMPILER_OVERRRIDE) - if(NOT (${CUDA_VERSION} VERSION_LESS 10.0)) - message(STATUS "cycles_cubin_cc not supported for CUDA 10.0+, using nvcc instead.") - set(WITH_CYCLES_CUBIN_COMPILER OFF) endif() endif() diff --git a/intern/cycles/app/CMakeLists.txt b/intern/cycles/app/CMakeLists.txt index 1c7a861ea93..076cada56ad 100644 --- a/intern/cycles/app/CMakeLists.txt +++ b/intern/cycles/app/CMakeLists.txt @@ -103,32 +103,3 @@ if(WITH_CYCLES_STANDALONE) $ DESTINATION ${CMAKE_INSTALL_PREFIX}) endif() - -##################################################################### -# Cycles cubin compiler executable -##################################################################### - -if(WITH_CYCLES_CUBIN_COMPILER) - # 32 bit windows is special, nvrtc is not supported on x86, so even - # though we are building 32 bit blender a 64 bit cubin_cc will have - # to be build to compile the cubins. - if(MSVC AND NOT CMAKE_CL_64) - message("Building with CUDA not supported on 32 bit, skipped") - set(WITH_CYCLES_CUDA_BINARIES OFF CACHE BOOL "" FORCE) - else() - set(SRC - cycles_cubin_cc.cpp - ) - set(INC - ../../../extern/cuew/include - ) - set(LIB - ) - cycles_external_libraries_append(LIB) - add_executable(cycles_cubin_cc ${SRC}) - include_directories(${INC}) - target_link_libraries(cycles_cubin_cc PRIVATE ${LIB}) - unset(SRC) - unset(INC) - endif() -endif() diff --git a/intern/cycles/app/cycles_cubin_cc.cpp b/intern/cycles/app/cycles_cubin_cc.cpp deleted file mode 100644 index 3b57dd83a5c..00000000000 --- a/intern/cycles/app/cycles_cubin_cc.cpp +++ /dev/null @@ -1,311 +0,0 @@ -/* SPDX-License-Identifier: Apache-2.0 - * Copyright 2017-2022 Blender Foundation */ - -#include -#include - -#include -#include - -#include -#include - -#include "cuew.h" - -#ifdef _MSC_VER -# include -#endif - -using std::string; -using std::vector; - -namespace std { -template std::string to_string(const T &n) -{ - std::ostringstream s; - s << n; - return s.str(); -} -} // namespace std - -class CompilationSettings { - public: - CompilationSettings() - : target_arch(0), bits(64), verbose(false), fast_math(false), ptx_only(false) - { - } - - string cuda_toolkit_dir; - string input_file; - string output_file; - string ptx_file; - vector defines; - vector includes; - int target_arch; - int bits; - bool verbose; - bool fast_math; - bool ptx_only; -}; - -static bool compile_cuda(CompilationSettings &settings) -{ - const char *headers[] = {"stdlib.h", "float.h", "math.h", "stdio.h", "stddef.h"}; - const char *header_content[] = {"\n", "\n", "\n", "\n", "\n"}; - - printf("Building %s\n", settings.input_file.c_str()); - - string code; - if (!OIIO::Filesystem::read_text_file(settings.input_file, code)) { - fprintf(stderr, "Error: unable to read %s\n", settings.input_file.c_str()); - return false; - } - - vector options; - for (size_t i = 0; i < settings.includes.size(); i++) { - options.push_back("-I" + settings.includes[i]); - } - - for (size_t i = 0; i < settings.defines.size(); i++) { - options.push_back("-D" + settings.defines[i]); - } - options.push_back("-D__KERNEL_CUDA_VERSION__=" + std::to_string(cuewNvrtcVersion())); - options.push_back("-arch=compute_" + std::to_string(settings.target_arch)); - options.push_back("--device-as-default-execution-space"); - options.push_back("-DCYCLES_CUBIN_CC"); - options.push_back("--std=c++11"); - if (settings.fast_math) - options.push_back("--use_fast_math"); - - nvrtcProgram prog; - nvrtcResult result = nvrtcCreateProgram(&prog, - code.c_str(), // buffer - NULL, // name - sizeof(headers) / sizeof(void *), // numHeaders - header_content, // headers - headers); // includeNames - - if (result != NVRTC_SUCCESS) { - fprintf(stderr, "Error: nvrtcCreateProgram failed (%d)\n\n", (int)result); - return false; - } - - /* Transfer options to a classic C array. */ - vector opts(options.size()); - for (size_t i = 0; i < options.size(); i++) { - opts[i] = options[i].c_str(); - } - - result = nvrtcCompileProgram(prog, options.size(), &opts[0]); - - if (result != NVRTC_SUCCESS) { - fprintf(stderr, "Error: nvrtcCompileProgram failed (%d)\n\n", (int)result); - - size_t log_size; - nvrtcGetProgramLogSize(prog, &log_size); - - vector log(log_size); - nvrtcGetProgramLog(prog, &log[0]); - fprintf(stderr, "%s\n", &log[0]); - - return false; - } - - /* Retrieve the ptx code. */ - size_t ptx_size; - result = nvrtcGetPTXSize(prog, &ptx_size); - if (result != NVRTC_SUCCESS) { - fprintf(stderr, "Error: nvrtcGetPTXSize failed (%d)\n\n", (int)result); - return false; - } - - vector ptx_code(ptx_size); - result = nvrtcGetPTX(prog, &ptx_code[0]); - if (result != NVRTC_SUCCESS) { - fprintf(stderr, "Error: nvrtcGetPTX failed (%d)\n\n", (int)result); - return false; - } - if (settings.ptx_only) { - settings.ptx_file = settings.output_file; - } - else { - /* Write a file in the temp folder with the ptx code. */ - settings.ptx_file = OIIO::Filesystem::temp_directory_path() + "/" + - OIIO::Filesystem::unique_path(); - } - FILE *f = fopen(settings.ptx_file.c_str(), "wb"); - fwrite(&ptx_code[0], 1, ptx_size, f); - fclose(f); - - return true; -} - -static bool link_ptxas(CompilationSettings &settings) -{ - string cudapath = ""; - if (settings.cuda_toolkit_dir.size()) - cudapath = settings.cuda_toolkit_dir + "/bin/"; - - string ptx = "\"" + cudapath + "ptxas\" " + settings.ptx_file + " -o " + settings.output_file + - " --gpu-name sm_" + std::to_string(settings.target_arch) + " -m" + - std::to_string(settings.bits); - - if (settings.verbose) { - ptx += " --verbose"; - printf("%s\n", ptx.c_str()); - } - - int pxresult = system(ptx.c_str()); - if (pxresult) { - fprintf(stderr, "Error: ptxas failed (%d)\n\n", pxresult); - return false; - } - - if (!OIIO::Filesystem::remove(settings.ptx_file)) { - fprintf(stderr, "Error: removing %s\n\n", settings.ptx_file.c_str()); - } - - return true; -} - -static bool init(CompilationSettings &settings) -{ -#ifdef _MSC_VER - if (settings.cuda_toolkit_dir.size()) { - SetDllDirectory((settings.cuda_toolkit_dir + "/bin").c_str()); - } -#else - (void)settings; -#endif - - int cuewresult = cuewInit(CUEW_INIT_NVRTC); - if (cuewresult != CUEW_SUCCESS) { - fprintf(stderr, "Error: cuew init fialed (0x%d)\n\n", cuewresult); - return false; - } - - if (cuewNvrtcVersion() < 80) { - fprintf(stderr, "Error: only cuda 8 and higher is supported, %d\n\n", cuewCompilerVersion()); - return false; - } - - if (!nvrtcCreateProgram) { - fprintf(stderr, "Error: nvrtcCreateProgram not resolved\n"); - return false; - } - - if (!nvrtcCompileProgram) { - fprintf(stderr, "Error: nvrtcCompileProgram not resolved\n"); - return false; - } - - if (!nvrtcGetProgramLogSize) { - fprintf(stderr, "Error: nvrtcGetProgramLogSize not resolved\n"); - return false; - } - - if (!nvrtcGetProgramLog) { - fprintf(stderr, "Error: nvrtcGetProgramLog not resolved\n"); - return false; - } - - if (!nvrtcGetPTXSize) { - fprintf(stderr, "Error: nvrtcGetPTXSize not resolved\n"); - return false; - } - - if (!nvrtcGetPTX) { - fprintf(stderr, "Error: nvrtcGetPTX not resolved\n"); - return false; - } - - return true; -} - -static bool parse_parameters(int argc, const char **argv, CompilationSettings &settings) -{ - OIIO::ArgParse ap; - ap.options("Usage: cycles_cubin_cc [options]", - "-target %d", - &settings.target_arch, - "target shader model", - "-m %d", - &settings.bits, - "Cuda architecture bits", - "-i %s", - &settings.input_file, - "Input source filename", - "-o %s", - &settings.output_file, - "Output cubin filename", - "-I %L", - &settings.includes, - "Add additional includepath", - "-D %L", - &settings.defines, - "Add additional defines", - "-ptx", - &settings.ptx_only, - "emit PTX code", - "-v", - &settings.verbose, - "Use verbose logging", - "--use_fast_math", - &settings.fast_math, - "Use fast math", - "-cuda-toolkit-dir %s", - &settings.cuda_toolkit_dir, - "path to the cuda toolkit binary directory", - NULL); - - if (ap.parse(argc, argv) < 0) { - fprintf(stderr, "%s\n", ap.geterror().c_str()); - ap.usage(); - return false; - } - - if (!settings.output_file.size()) { - fprintf(stderr, "Error: Output file not set(-o), required\n\n"); - return false; - } - - if (!settings.input_file.size()) { - fprintf(stderr, "Error: Input file not set(-i, required\n\n"); - return false; - } - - if (!settings.target_arch) { - fprintf(stderr, "Error: target shader model not set (-target), required\n\n"); - return false; - } - - return true; -} - -int main(int argc, const char **argv) -{ - CompilationSettings settings; - - if (!parse_parameters(argc, argv, settings)) { - fprintf(stderr, "Error: invalid parameters, exiting\n"); - exit(EXIT_FAILURE); - } - - if (!init(settings)) { - fprintf(stderr, "Error: initialization error, exiting\n"); - exit(EXIT_FAILURE); - } - - if (!compile_cuda(settings)) { - fprintf(stderr, "Error: compilation error, exiting\n"); - exit(EXIT_FAILURE); - } - - if (!settings.ptx_only) { - if (!link_ptxas(settings)) { - exit(EXIT_FAILURE); - } - } - - return 0; -} diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 99f9e536977..64f7214530f 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -468,52 +468,27 @@ if(WITH_CYCLES_CUDA_BINARIES) set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG) endif() - if(WITH_CYCLES_CUBIN_COMPILER) - string(SUBSTRING ${arch} 3 -1 CUDA_ARCH) - - # Needed to find libnvrtc-builtins.so. Can't do it from inside - # cycles_cubin_cc since the env variable is read before main() - if(APPLE) - set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env DYLD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib") - elseif(UNIX) - set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env LD_LIBRARY_PATH="${cuda_toolkit_root_dir}/lib64") - endif() + set(_cuda_nvcc_args + -arch=${arch} + ${CUDA_NVCC_FLAGS} + --${format} + ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} + --ptxas-options="-v" + ${cuda_flags}) + if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM) add_custom_command( OUTPUT ${cuda_file} - COMMAND ${CUBIN_CC_ENV} - "$" - -target ${CUDA_ARCH} - -i ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} - ${cuda_flags} - -v - -cuda-toolkit-dir "${cuda_toolkit_root_dir}" - DEPENDS ${kernel_sources} cycles_cubin_cc) + COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args} + DEPENDS ${kernel_sources}) else() - set(_cuda_nvcc_args - -arch=${arch} - ${CUDA_NVCC_FLAGS} - --${format} - ${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src} - --ptxas-options="-v" - ${cuda_flags}) - - if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM) - add_custom_command( - OUTPUT ${cuda_file} - COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args} - DEPENDS ${kernel_sources}) - else() - add_custom_command( - OUTPUT ${cuda_file} - COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args} - DEPENDS ${kernel_sources}) - endif() - - unset(_cuda_nvcc_args) + add_custom_command( + OUTPUT ${cuda_file} + COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args} + DEPENDS ${kernel_sources}) endif() + + unset(_cuda_nvcc_args) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib) list(APPEND cuda_cubins ${cuda_file}) @@ -665,55 +640,25 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG) endif() - if(WITH_CYCLES_CUBIN_COMPILER) - # Needed to find libnvrtc-builtins.so. Can't do it from inside - # cycles_cubin_cc since the env variable is read before main() - if(APPLE) - set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env DYLD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib") - elseif(UNIX) - set(CUBIN_CC_ENV ${CMAKE_COMMAND} - -E env LD_LIBRARY_PATH="${CUDA_TOOLKIT_ROOT_DIR}/lib64") - endif() + add_custom_command( + OUTPUT + ${output} + DEPENDS + ${input} + ${SRC_KERNEL_HEADERS} + ${SRC_KERNEL_DEVICE_GPU_HEADERS} + ${SRC_KERNEL_DEVICE_CUDA_HEADERS} + ${SRC_KERNEL_DEVICE_OPTIX_HEADERS} + ${SRC_UTIL_HEADERS} + COMMAND + ${CUDA_NVCC_EXECUTABLE} + --ptx + -arch=sm_50 + ${cuda_flags} + ${input} + WORKING_DIRECTORY + "${CMAKE_CURRENT_SOURCE_DIR}") - add_custom_command( - OUTPUT ${output} - DEPENDS - ${input} - ${SRC_KERNEL_HEADERS} - ${SRC_KERNEL_DEVICE_GPU_HEADERS} - ${SRC_KERNEL_DEVICE_CUDA_HEADERS} - ${SRC_KERNEL_DEVICE_OPTIX_HEADERS} - ${SRC_UTIL_HEADERS} - COMMAND ${CUBIN_CC_ENV} - "$" - -target 50 - -ptx - -i ${CMAKE_CURRENT_SOURCE_DIR}/${input} - ${cuda_flags} - -v - -cuda-toolkit-dir "${CUDA_TOOLKIT_ROOT_DIR}" - DEPENDS ${kernel_sources} cycles_cubin_cc) - else() - add_custom_command( - OUTPUT - ${output} - DEPENDS - ${input} - ${SRC_KERNEL_HEADERS} - ${SRC_KERNEL_DEVICE_GPU_HEADERS} - ${SRC_KERNEL_DEVICE_CUDA_HEADERS} - ${SRC_KERNEL_DEVICE_OPTIX_HEADERS} - ${SRC_UTIL_HEADERS} - COMMAND - ${CUDA_NVCC_EXECUTABLE} - --ptx - -arch=sm_50 - ${cuda_flags} - ${input} - WORKING_DIRECTORY - "${CMAKE_CURRENT_SOURCE_DIR}") - endif() list(APPEND optix_ptx ${output}) delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib)