Fix: OSL shadeops module fails to load in OptiX
The OSL dependency is built with target sm_50 currently, which means that LLVM defaults to generating PTX version 4.0. However, due to an apparent bug in LLVM 20 it still uses instructions that were only introduced in PTX version 6.0. As a result OptiX refuses to load the shadeops PTX with an `OPTIX_ERROR_INVALID_INPUT` error. To fix this, raise the PTX version generated by LLVM to 6.0 for both the shadeops module (which previously used 4.0) and also any generated code (which previously used 5.0) to be safe. PTX version 6.0 was introduced with CUDA 9, so it has pretty long driver backwards compatibility still. This commit contains fixes for the OSL, to fully fix the original report a recompiled OSL libraries would need to land for the affected platforms. Ref #147361 Pull Request: https://projects.blender.org/blender/blender/pulls/147620
This commit is contained in:
committed by
Sergey Sharybin
parent
1621950441
commit
d9cce547e6
@@ -81,6 +81,9 @@ ExternalProject_Add(external_osl
|
||||
${PATCH_CMD} -p 1 -d
|
||||
${BUILD_DIR}/osl/src/external_osl <
|
||||
${PATCH_DIR}/osl.diff &&
|
||||
${PATCH_CMD} -p 1 -d
|
||||
${BUILD_DIR}/osl/src/external_osl <
|
||||
${PATCH_DIR}/osl_ptx_version.diff &&
|
||||
${PATCH_CMD} -p 1 -d
|
||||
${BUILD_DIR}/osl/src/external_osl <
|
||||
${PATCH_DIR}/osl_supports_isa_thread.diff
|
||||
|
||||
26
build_files/build_environment/patches/osl_ptx_version.diff
Normal file
26
build_files/build_environment/patches/osl_ptx_version.diff
Normal file
@@ -0,0 +1,26 @@
|
||||
diff --git a/src/cmake/cuda_macros.cmake b/src/cmake/cuda_macros.cmake
|
||||
index cf38234e..78c7580f 100644
|
||||
--- a/src/cmake/cuda_macros.cmake
|
||||
+++ b/src/cmake/cuda_macros.cmake
|
||||
@@ -225,7 +225,7 @@ function ( CUDA_SHADEOPS_COMPILE prefix output_bc output_ptx input_srcs headers
|
||||
add_custom_command ( OUTPUT ${linked_bc} ${linked_ptx}
|
||||
COMMAND ${LLVM_LINK_TOOL} ${shadeops_bc_list} -o ${linked_bc}
|
||||
COMMAND ${LLVM_OPT_TOOL} ${opt_tool_flags} ${linked_bc} -o ${linked_bc}
|
||||
- COMMAND ${LLVM_LLC_TOOL} --march=nvptx64 -mcpu=${CUDA_TARGET_ARCH} ${linked_bc} -o ${linked_ptx}
|
||||
+ COMMAND ${LLVM_LLC_TOOL} -march=nvptx64 -mcpu=${CUDA_TARGET_ARCH} -mattr=+ptx60 ${linked_bc} -o ${linked_ptx}
|
||||
# This script converts all of the .weak functions defined in the PTX into .visible functions.
|
||||
COMMAND ${Python3_EXECUTABLE} "${CMAKE_SOURCE_DIR}/src/build-scripts/process-ptx.py"
|
||||
${linked_ptx} ${linked_ptx}
|
||||
diff --git a/src/liboslexec/llvm_util.cpp b/src/liboslexec/llvm_util.cpp
|
||||
index ad85cec1..8da4149c 100644
|
||||
--- a/src/liboslexec/llvm_util.cpp
|
||||
+++ b/src/liboslexec/llvm_util.cpp
|
||||
@@ -1831,7 +1831,7 @@ LLVM_Util::nvptx_target_machine()
|
||||
&& "PTX compile error: LLVM Target is not initialized");
|
||||
|
||||
m_nvptx_target_machine = llvm_target->createTargetMachine(
|
||||
- ModuleTriple.str(), CUDA_TARGET_ARCH, "+ptx50", options,
|
||||
+ ModuleTriple.str(), CUDA_TARGET_ARCH, "+ptx60", options,
|
||||
llvm::Reloc::Static, llvm::CodeModel::Small,
|
||||
#if OSL_LLVM_VERSION >= 180
|
||||
llvm::CodeGenOptLevel::Default
|
||||
Reference in New Issue
Block a user