The current usage of software-based texture operations in the oneAPI implementation puts additional register pressure on the GPU compiler during register allocation. And it also creates code that requires maintenance. This commit is intended to address this situation by utilizing a recently productized SYCL bindless texture API to enable HW-based texture operations using Intel GPUs' hardware sampler. This currently translates to 1-11% rendering speedups (scene-specific) on my Arc A770 and Arc B580. At the moment, there are small performance regressions with NanoVDB texture operations on Arc B580 and small performance regressions in shade surface MNEE and Raytrace kernels on Arc A770, but they look recoverable and will be handled in the future. Pull Request: https://projects.blender.org/blender/blender/pulls/133457
1461 lines
46 KiB
CMake
1461 lines
46 KiB
CMake
# SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
remove_extra_strict_flags()
|
|
|
|
set(INC
|
|
..
|
|
)
|
|
|
|
set(INC_SYS
|
|
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_CPU
|
|
device/cpu/globals.cpp
|
|
device/cpu/kernel.cpp
|
|
device/cpu/kernel_avx2.cpp
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_CUDA
|
|
device/cuda/kernel.cu
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_HIP
|
|
device/hip/kernel.cpp
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_HIPRT
|
|
device/hiprt/kernel.cpp
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_METAL
|
|
device/metal/kernel.metal
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_OPTIX
|
|
device/optix/kernel.cu
|
|
device/optix/kernel_shader_raytrace.cu
|
|
)
|
|
|
|
if(WITH_CYCLES_OSL)
|
|
math(EXPR OSL_LIBRARY_VERSION_CODE "${OSL_LIBRARY_VERSION_MAJOR} * 10000 + ${OSL_LIBRARY_VERSION_MINOR} * 100 + ${OSL_LIBRARY_VERSION_PATCH}")
|
|
|
|
set(SRC_KERNEL_DEVICE_OPTIX
|
|
${SRC_KERNEL_DEVICE_OPTIX}
|
|
osl/services_optix.cu
|
|
device/optix/kernel_osl.cu
|
|
)
|
|
endif()
|
|
|
|
set(SRC_KERNEL_DEVICE_ONEAPI
|
|
device/oneapi/kernel.cpp
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_CPU_HEADERS
|
|
device/cpu/bvh.h
|
|
device/cpu/compat.h
|
|
device/cpu/image.h
|
|
device/cpu/globals.h
|
|
device/cpu/kernel.h
|
|
device/cpu/kernel_arch.h
|
|
device/cpu/kernel_arch_impl.h
|
|
)
|
|
set(SRC_KERNEL_DEVICE_GPU_HEADERS
|
|
device/gpu/image.h
|
|
device/gpu/kernel.h
|
|
device/gpu/parallel_active_index.h
|
|
device/gpu/parallel_prefix_sum.h
|
|
device/gpu/parallel_sorted_index.h
|
|
device/gpu/work_stealing.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_CUDA_HEADERS
|
|
device/cuda/compat.h
|
|
device/cuda/config.h
|
|
device/cuda/globals.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_HIP_HEADERS
|
|
device/hip/compat.h
|
|
device/hip/config.h
|
|
device/hip/globals.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_HIPRT_HEADERS
|
|
device/hiprt/bvh.h
|
|
device/hiprt/common.h
|
|
device/hiprt/globals.h
|
|
device/hiprt/hiprt_kernels.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_HIPRT_SDK
|
|
hiprt/impl/Aabb.h
|
|
hiprt/impl/BvhNode.h
|
|
hiprt/impl/Geometry.h
|
|
hiprt/impl/hiprt_device_impl.h
|
|
hiprt/impl/hiprt_kernels_bitcode.h
|
|
hiprt/impl/Instance.h
|
|
hiprt/impl/Math.h
|
|
hiprt/impl/QrDecomposition.h
|
|
hiprt/impl/Quaternion.h
|
|
hiprt/impl/Scene.h
|
|
hiprt/impl/Transform.h
|
|
hiprt/impl/Triangle.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS
|
|
hiprt/hiprt_common.h
|
|
hiprt/hiprt_device.h
|
|
hiprt/hiprt_types.h
|
|
hiprt/hiprt_vec.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_OPTIX_HEADERS
|
|
device/optix/bvh.h
|
|
device/optix/compat.h
|
|
device/optix/globals.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_METAL_HEADERS
|
|
device/metal/bvh.h
|
|
device/metal/compat.h
|
|
device/metal/context_begin.h
|
|
device/metal/context_end.h
|
|
device/metal/function_constants.h
|
|
device/metal/globals.h
|
|
)
|
|
|
|
set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS
|
|
device/oneapi/compat.h
|
|
device/oneapi/context_begin.h
|
|
device/oneapi/context_end.h
|
|
device/oneapi/context_intersect_begin.h
|
|
device/oneapi/context_intersect_end.h
|
|
device/oneapi/globals.h
|
|
device/oneapi/kernel.h
|
|
device/oneapi/kernel_templates.h
|
|
device/cpu/bvh.h
|
|
)
|
|
|
|
set(SRC_KERNEL_CLOSURE_HEADERS
|
|
closure/alloc.h
|
|
closure/bsdf.h
|
|
closure/bsdf_ashikhmin_velvet.h
|
|
closure/bsdf_burley.h
|
|
closure/bsdf_diffuse.h
|
|
closure/bsdf_diffuse_ramp.h
|
|
closure/bsdf_microfacet.h
|
|
closure/bsdf_oren_nayar.h
|
|
closure/bsdf_phong_ramp.h
|
|
closure/bsdf_ray_portal.h
|
|
closure/bsdf_sheen.h
|
|
closure/bsdf_toon.h
|
|
closure/bsdf_transparent.h
|
|
closure/bsdf_util.h
|
|
closure/bsdf_ashikhmin_shirley.h
|
|
closure/bsdf_hair.h
|
|
closure/bssrdf.h
|
|
closure/emissive.h
|
|
closure/volume.h
|
|
closure/volume_util.h
|
|
closure/volume_henyey_greenstein.h
|
|
closure/volume_rayleigh.h
|
|
closure/volume_fournier_forand.h
|
|
closure/volume_draine.h
|
|
closure/bsdf_principled_hair_chiang.h
|
|
closure/bsdf_principled_hair_huang.h
|
|
)
|
|
|
|
set(SRC_KERNEL_SVM_HEADERS
|
|
svm/svm.h
|
|
svm/ao.h
|
|
svm/aov.h
|
|
svm/attribute.h
|
|
svm/bevel.h
|
|
svm/blackbody.h
|
|
svm/bump.h
|
|
svm/camera.h
|
|
svm/clamp.h
|
|
svm/closure.h
|
|
svm/convert.h
|
|
svm/checker.h
|
|
svm/color_util.h
|
|
svm/brick.h
|
|
svm/displace.h
|
|
svm/fresnel.h
|
|
svm/wireframe.h
|
|
svm/wavelength.h
|
|
svm/gabor.h
|
|
svm/gamma.h
|
|
svm/brightness.h
|
|
svm/geometry.h
|
|
svm/gradient.h
|
|
svm/hsv.h
|
|
svm/ies.h
|
|
svm/image.h
|
|
svm/invert.h
|
|
svm/light_path.h
|
|
svm/magic.h
|
|
svm/map_range.h
|
|
svm/mapping.h
|
|
svm/mapping_util.h
|
|
svm/math.h
|
|
svm/math_util.h
|
|
svm/mix.h
|
|
svm/node_types_template.h
|
|
svm/noise.h
|
|
svm/noisetex.h
|
|
svm/normal.h
|
|
svm/ramp.h
|
|
svm/ramp_util.h
|
|
svm/sepcomb_color.h
|
|
svm/sepcomb_hsv.h
|
|
svm/sepcomb_vector.h
|
|
svm/sky.h
|
|
svm/tex_coord.h
|
|
svm/fractal_noise.h
|
|
svm/types.h
|
|
svm/util.h
|
|
svm/value.h
|
|
svm/vector_rotate.h
|
|
svm/vector_transform.h
|
|
svm/voronoi.h
|
|
svm/voxel.h
|
|
svm/wave.h
|
|
svm/white_noise.h
|
|
svm/vertex_color.h
|
|
)
|
|
|
|
if(WITH_CYCLES_OSL)
|
|
set(SRC_KERNEL_OSL_HEADERS
|
|
osl/osl.h
|
|
osl/closures_setup.h
|
|
osl/closures_template.h
|
|
osl/services_gpu.h
|
|
osl/types.h
|
|
)
|
|
endif()
|
|
|
|
set(SRC_KERNEL_GEOM_HEADERS
|
|
geom/attribute.h
|
|
geom/curve.h
|
|
geom/curve_intersect.h
|
|
geom/motion_curve.h
|
|
geom/motion_point.h
|
|
geom/motion_triangle.h
|
|
geom/motion_triangle_intersect.h
|
|
geom/motion_triangle_shader.h
|
|
geom/object.h
|
|
geom/patch.h
|
|
geom/point.h
|
|
geom/point_intersect.h
|
|
geom/primitive.h
|
|
geom/shader_data.h
|
|
geom/subd_triangle.h
|
|
geom/triangle.h
|
|
geom/triangle_intersect.h
|
|
geom/volume.h
|
|
)
|
|
|
|
set(SRC_KERNEL_BAKE_HEADERS
|
|
bake/bake.h
|
|
)
|
|
|
|
set(SRC_KERNEL_BVH_HEADERS
|
|
bvh/bvh.h
|
|
bvh/nodes.h
|
|
bvh/shadow_all.h
|
|
bvh/local.h
|
|
bvh/traversal.h
|
|
bvh/types.h
|
|
bvh/util.h
|
|
bvh/volume.h
|
|
bvh/volume_all.h
|
|
)
|
|
|
|
set(SRC_KERNEL_CAMERA_HEADERS
|
|
camera/camera.h
|
|
camera/projection.h
|
|
)
|
|
|
|
set(SRC_KERNEL_FILM_HEADERS
|
|
film/adaptive_sampling.h
|
|
film/aov_passes.h
|
|
film/data_passes.h
|
|
film/denoising_passes.h
|
|
film/cryptomatte_passes.h
|
|
film/light_passes.h
|
|
film/read.h
|
|
film/write.h
|
|
)
|
|
|
|
set(SRC_KERNEL_INTEGRATOR_HEADERS
|
|
integrator/displacement_shader.h
|
|
integrator/init_from_bake.h
|
|
integrator/init_from_camera.h
|
|
integrator/intersect_dedicated_light.h
|
|
integrator/intersect_closest.h
|
|
integrator/intersect_shadow.h
|
|
integrator/intersect_subsurface.h
|
|
integrator/intersect_volume_stack.h
|
|
integrator/guiding.h
|
|
integrator/megakernel.h
|
|
integrator/mnee.h
|
|
integrator/path_state.h
|
|
integrator/shade_background.h
|
|
integrator/shade_light.h
|
|
integrator/shade_shadow.h
|
|
integrator/shade_surface.h
|
|
integrator/shade_volume.h
|
|
integrator/shade_dedicated_light.h
|
|
integrator/shadow_catcher.h
|
|
integrator/shadow_linking.h
|
|
integrator/shadow_state_template.h
|
|
integrator/state_flow.h
|
|
integrator/state.h
|
|
integrator/state_template.h
|
|
integrator/state_util.h
|
|
integrator/subsurface_disk.h
|
|
integrator/subsurface.h
|
|
integrator/subsurface_random_walk.h
|
|
integrator/surface_shader.h
|
|
integrator/volume_shader.h
|
|
integrator/volume_stack.h
|
|
)
|
|
|
|
set(SRC_KERNEL_LIGHT_HEADERS
|
|
light/area.h
|
|
light/background.h
|
|
light/common.h
|
|
light/distant.h
|
|
light/distribution.h
|
|
light/light.h
|
|
light/point.h
|
|
light/sample.h
|
|
light/spot.h
|
|
light/tree.h
|
|
light/triangle.h
|
|
)
|
|
|
|
set(SRC_KERNEL_SAMPLE_HEADERS
|
|
sample/lcg.h
|
|
sample/mapping.h
|
|
sample/mis.h
|
|
sample/pattern.h
|
|
sample/sobol_burley.h
|
|
sample/tabulated_sobol.h
|
|
sample/util.h
|
|
)
|
|
|
|
set(SRC_KERNEL_UTIL_HEADERS
|
|
util/colorspace.h
|
|
util/differential.h
|
|
util/ies.h
|
|
util/lookup_table.h
|
|
util/nanovdb.h
|
|
util/profiler.h
|
|
)
|
|
|
|
set(SRC_KERNEL_TYPES_HEADERS
|
|
data_arrays.h
|
|
data_template.h
|
|
globals.h
|
|
image.h
|
|
tables.h
|
|
types.h
|
|
)
|
|
|
|
set(SRC_KERNEL_HEADERS
|
|
${SRC_KERNEL_BAKE_HEADERS}
|
|
${SRC_KERNEL_BVH_HEADERS}
|
|
${SRC_KERNEL_CAMERA_HEADERS}
|
|
${SRC_KERNEL_CLOSURE_HEADERS}
|
|
${SRC_KERNEL_FILM_HEADERS}
|
|
${SRC_KERNEL_GEOM_HEADERS}
|
|
${SRC_KERNEL_INTEGRATOR_HEADERS}
|
|
${SRC_KERNEL_LIGHT_HEADERS}
|
|
${SRC_KERNEL_OSL_HEADERS}
|
|
${SRC_KERNEL_SAMPLE_HEADERS}
|
|
${SRC_KERNEL_SVM_HEADERS}
|
|
${SRC_KERNEL_TYPES_HEADERS}
|
|
${SRC_KERNEL_UTIL_HEADERS}
|
|
)
|
|
|
|
set(SRC_UTIL_HEADERS
|
|
../util/atomic.h
|
|
../util/color.h
|
|
../util/defines.h
|
|
../util/half.h
|
|
../util/hash.h
|
|
../util/math.h
|
|
../util/math_base.h
|
|
../util/math_fast.h
|
|
../util/math_intersect.h
|
|
../util/math_float2.h
|
|
../util/math_float3.h
|
|
../util/math_float4.h
|
|
../util/math_float8.h
|
|
../util/math_int2.h
|
|
../util/math_int3.h
|
|
../util/math_int4.h
|
|
../util/math_int8.h
|
|
../util/projection.h
|
|
../util/projection_inverse.h
|
|
../util/rect.h
|
|
../util/static_assert.h
|
|
../util/transform.h
|
|
../util/texture.h
|
|
../util/types.h
|
|
../util/types_base.h
|
|
../util/types_float2.h
|
|
../util/types_float3.h
|
|
../util/types_float4.h
|
|
../util/types_float8.h
|
|
../util/types_int2.h
|
|
../util/types_int3.h
|
|
../util/types_int4.h
|
|
../util/types_int8.h
|
|
../util/types_spectrum.h
|
|
../util/types_uchar2.h
|
|
../util/types_uchar3.h
|
|
../util/types_uchar4.h
|
|
../util/types_uint2.h
|
|
../util/types_uint3.h
|
|
../util/types_uint4.h
|
|
../util/types_ushort4.h
|
|
)
|
|
|
|
set(LIB
|
|
|
|
)
|
|
|
|
# `Zstd` compressor for kernels.
|
|
add_executable(zstd_compress ../cmake/zstd_compress.cpp)
|
|
target_include_directories(zstd_compress SYSTEM PRIVATE ${ZSTD_INCLUDE_DIRS})
|
|
|
|
target_link_libraries(zstd_compress ${ZSTD_LIBRARIES})
|
|
if(DEFINED PTHREADS_LIBRARIES)
|
|
target_link_libraries(zstd_compress ${PTHREADS_LIBRARIES})
|
|
endif()
|
|
|
|
if(NOT WITH_BLENDER)
|
|
# For the Cycles standalone put libraries next to the Cycles application.
|
|
set(cycles_kernel_runtime_lib_target_path ${CYCLES_INSTALL_PATH})
|
|
else()
|
|
# For Blender put the libraries next to the Blender executable.
|
|
#
|
|
# Note that the installation path in the delayed_install is relative to the versioned folder,
|
|
# which means we need to go one level up.
|
|
set(cycles_kernel_runtime_lib_target_path "../")
|
|
endif()
|
|
|
|
if(UNIX AND NOT APPLE)
|
|
set(cycles_kernel_runtime_lib_target_path ${cycles_kernel_runtime_lib_target_path}/lib)
|
|
endif()
|
|
|
|
# CUDA module
|
|
|
|
if(WITH_CYCLES_CUDA_BINARIES)
|
|
# 64 bit only
|
|
set(CUDA_BITS 64)
|
|
|
|
# CUDA version
|
|
execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
|
|
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${NVCC_OUT}")
|
|
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${NVCC_OUT}")
|
|
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
|
|
|
|
# warn for other versions
|
|
if((CUDA_VERSION STREQUAL "101") OR
|
|
(CUDA_VERSION STREQUAL "102") OR
|
|
(CUDA_VERSION_MAJOR STREQUAL "11") OR
|
|
(CUDA_VERSION_MAJOR STREQUAL "12"))
|
|
else()
|
|
message(WARNING
|
|
"CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, "
|
|
"build may succeed but only CUDA 12, 11, 10.2 and 10.1 have been tested")
|
|
endif()
|
|
|
|
# build for each arch
|
|
set(cuda_sources device/cuda/kernel.cu
|
|
${SRC_KERNEL_HEADERS}
|
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
|
${SRC_UTIL_HEADERS}
|
|
)
|
|
set(cuda_cubins)
|
|
|
|
macro(CYCLES_CUDA_KERNEL_ADD arch prev_arch name flags sources experimental)
|
|
if(${arch} MATCHES "compute_.*")
|
|
set(format "ptx")
|
|
else()
|
|
set(format "cubin")
|
|
endif()
|
|
set(cuda_file ${name}_${arch}.${format})
|
|
set(cuda_file_compressed ${cuda_file}.zst)
|
|
|
|
set(kernel_sources ${sources})
|
|
if(NOT ${prev_arch} STREQUAL "none")
|
|
if(${prev_arch} MATCHES "compute_.*")
|
|
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.ptx)
|
|
else()
|
|
set(kernel_sources ${kernel_sources} ${name}_${prev_arch}.cubin)
|
|
endif()
|
|
endif()
|
|
|
|
set(cuda_kernel_src "/device/cuda/${name}.cu")
|
|
|
|
set(cuda_flags ${flags}
|
|
-D CCL_NAMESPACE_BEGIN=
|
|
-D CCL_NAMESPACE_END=
|
|
-D NVCC
|
|
-m ${CUDA_BITS}
|
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/cuda
|
|
--use_fast_math
|
|
-o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}
|
|
-Wno-deprecated-gpu-targets)
|
|
|
|
if(CUDA_HOST_COMPILER)
|
|
set(cuda_flags ${cuda_flags}
|
|
-ccbin="${CUDA_HOST_COMPILER}")
|
|
endif()
|
|
|
|
if(WITH_NANOVDB)
|
|
set(cuda_flags ${cuda_flags}
|
|
-D WITH_NANOVDB)
|
|
endif()
|
|
|
|
if(WITH_CYCLES_DEBUG)
|
|
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
|
set(cuda_flags ${cuda_flags} --ptxas-options="-v")
|
|
endif()
|
|
|
|
if(${cuda_version} GREATER_EQUAL 110)
|
|
# Helps with compatibility when using recent clang host compiler.
|
|
set(cuda_flags ${cuda_flags} -std=c++17)
|
|
endif()
|
|
|
|
set(_cuda_nvcc_args
|
|
-arch=${arch}
|
|
${CUDA_NVCC_FLAGS}
|
|
--${format}
|
|
${CMAKE_CURRENT_SOURCE_DIR}${cuda_kernel_src}
|
|
${cuda_flags}
|
|
)
|
|
|
|
if(WITH_COMPILER_CCACHE AND CCACHE_PROGRAM)
|
|
add_custom_command(
|
|
OUTPUT ${cuda_file}
|
|
COMMAND ${CCACHE_PROGRAM} ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
|
DEPENDS ${kernel_sources})
|
|
else()
|
|
add_custom_command(
|
|
OUTPUT ${cuda_file}
|
|
COMMAND ${cuda_nvcc_executable} ${_cuda_nvcc_args}
|
|
DEPENDS ${kernel_sources})
|
|
endif()
|
|
|
|
add_custom_command(
|
|
OUTPUT ${cuda_file_compressed}
|
|
COMMAND "$<TARGET_FILE:zstd_compress>" ${cuda_file} ${cuda_file_compressed}
|
|
DEPENDS ${cuda_file})
|
|
|
|
unset(_cuda_nvcc_args)
|
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
|
list(APPEND cuda_cubins ${cuda_file_compressed})
|
|
|
|
unset(cuda_debug_flags)
|
|
endmacro()
|
|
|
|
set(prev_arch "none")
|
|
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
|
|
if(${arch} MATCHES ".*_2.")
|
|
message(STATUS "CUDA binaries for ${arch} are no longer supported, skipped.")
|
|
elseif(${arch} MATCHES ".*_30")
|
|
if(DEFINED CUDA10_NVCC_EXECUTABLE)
|
|
set(cuda_nvcc_executable ${CUDA10_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA10_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version 100)
|
|
elseif("${CUDA_VERSION}" LESS 110) # Support for sm_30 was removed in CUDA 11
|
|
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version ${CUDA_VERSION})
|
|
else()
|
|
message(STATUS "CUDA binaries for ${arch} require CUDA 10 or earlier, skipped.")
|
|
endif()
|
|
elseif(${arch} MATCHES ".*_3.")
|
|
if(DEFINED CUDA11_NVCC_EXECUTABLE)
|
|
set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version 110)
|
|
elseif("${CUDA_VERSION}" LESS 120) # Support for sm_35, sm_37 was removed in CUDA 12
|
|
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version ${CUDA_VERSION})
|
|
else()
|
|
message(STATUS "CUDA binaries for ${arch} require CUDA 11 or earlier, skipped.")
|
|
endif()
|
|
elseif(${arch} MATCHES ".*_7." AND "${CUDA_VERSION}" LESS 100)
|
|
message(STATUS "CUDA binaries for ${arch} require CUDA 10.0+, skipped.")
|
|
elseif(${arch} MATCHES ".*_8.")
|
|
if("${CUDA_VERSION}" GREATER_EQUAL 111) # Support for sm_86 was introduced in CUDA 11
|
|
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version ${CUDA_VERSION})
|
|
elseif(DEFINED CUDA11_NVCC_EXECUTABLE)
|
|
set(cuda_nvcc_executable ${CUDA11_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA11_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version 110)
|
|
else()
|
|
message(STATUS "CUDA binaries for ${arch} require CUDA 11.1+, skipped.")
|
|
endif()
|
|
elseif(${arch} MATCHES ".*_10." OR ${arch} MATCHES ".*_120")
|
|
if("${CUDA_VERSION}" GREATER_EQUAL 128) # Support for sm_100, sm_101, sm_120 was introduced in CUDA 12.8
|
|
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version ${CUDA_VERSION})
|
|
else()
|
|
message(STATUS "CUDA binaries for ${arch} require CUDA 12.8+, skipped.")
|
|
endif()
|
|
else()
|
|
set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE})
|
|
set(cuda_toolkit_root_dir ${CUDA_TOOLKIT_ROOT_DIR})
|
|
set(cuda_version ${CUDA_VERSION})
|
|
endif()
|
|
if(DEFINED cuda_nvcc_executable AND DEFINED cuda_toolkit_root_dir)
|
|
# Compile regular kernel
|
|
cycles_cuda_kernel_add(${arch} ${prev_arch} kernel "" "${cuda_sources}" FALSE)
|
|
|
|
if(WITH_CYCLES_CUDA_BUILD_SERIAL)
|
|
set(prev_arch ${arch})
|
|
endif()
|
|
|
|
unset(cuda_nvcc_executable)
|
|
unset(cuda_toolkit_root_dir)
|
|
endif()
|
|
endforeach()
|
|
|
|
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
|
|
cycles_set_solution_folder(cycles_kernel_cuda)
|
|
endif()
|
|
|
|
# HIP module
|
|
|
|
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
|
# build for each arch
|
|
set(hip_sources device/hip/kernel.cpp
|
|
${SRC_KERNEL_HEADERS}
|
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
|
${SRC_UTIL_HEADERS}
|
|
)
|
|
set(hip_fatbins)
|
|
|
|
macro(CYCLES_HIP_KERNEL_ADD arch name flags sources experimental)
|
|
set(format "fatbin")
|
|
set(hip_file ${name}_${arch}.${format})
|
|
set(hip_file_compressed ${hip_file}.zst)
|
|
set(kernel_sources ${sources})
|
|
|
|
set(hip_kernel_src "/device/hip/${name}.cpp")
|
|
|
|
if(WIN32)
|
|
set(hip_command ${CMAKE_COMMAND})
|
|
set(hip_flags
|
|
-E env "HIP_PATH=${HIP_ROOT_DIR}"
|
|
${HIP_HIPCC_EXECUTABLE}.bat)
|
|
else()
|
|
set(hip_command ${HIP_HIPCC_EXECUTABLE})
|
|
set(hip_flags)
|
|
endif()
|
|
|
|
# There's a few bugs in the HIP compiler causing some scenes to fail to render,
|
|
# or render improperly with specific combinations of device and/or compiler.
|
|
# - Vega (gfx90x) fails to render some scenes
|
|
# - Other GPUs (E.g. RDNA3) render volumes improperly with HIP 6.1.40252
|
|
# A workaround is to set -O1 opt level during kernel compilation for these
|
|
# configurations.
|
|
if(WIN32 AND ((${arch} MATCHES "gfx90[a-z0-9]+") OR (${HIP_VERSION} STREQUAL "6.1.40252")))
|
|
set(hip_opt_flags "-O1")
|
|
else()
|
|
set(hip_opt_flags)
|
|
endif()
|
|
|
|
set(hip_flags
|
|
${hip_flags}
|
|
--offload-arch=${arch}
|
|
${HIP_HIPCC_FLAGS}
|
|
--genco
|
|
${CMAKE_CURRENT_SOURCE_DIR}${hip_kernel_src}
|
|
${flags}
|
|
-D CCL_NAMESPACE_BEGIN=
|
|
-D CCL_NAMESPACE_END=
|
|
-D HIPCC
|
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hip
|
|
-Wno-parentheses-equality
|
|
-Wno-unused-value
|
|
-ffast-math
|
|
${hip_opt_flags}
|
|
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
|
|
|
if(WITH_NANOVDB)
|
|
set(hip_flags ${hip_flags}
|
|
-D WITH_NANOVDB)
|
|
endif()
|
|
|
|
if(WITH_CYCLES_DEBUG)
|
|
set(hip_flags ${hip_flags} -D WITH_CYCLES_DEBUG)
|
|
endif()
|
|
|
|
add_custom_command(
|
|
OUTPUT ${hip_file}
|
|
COMMAND ${hip_command} ${hip_flags}
|
|
DEPENDS ${kernel_sources})
|
|
add_custom_command(
|
|
OUTPUT ${hip_file_compressed}
|
|
COMMAND "$<TARGET_FILE:zstd_compress>" ${hip_file} ${hip_file_compressed}
|
|
DEPENDS ${hip_file})
|
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
|
list(APPEND hip_fatbins ${hip_file_compressed})
|
|
endmacro()
|
|
|
|
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
|
|
# Compile regular kernel
|
|
cycles_hip_kernel_add(${arch} kernel "" "${hip_sources}" FALSE)
|
|
endforeach()
|
|
|
|
add_custom_target(cycles_kernel_hip ALL DEPENDS ${hip_fatbins})
|
|
cycles_set_solution_folder(cycles_kernel_hip)
|
|
endif()
|
|
|
|
# HIP RT module
|
|
|
|
if(WITH_CYCLES_DEVICE_HIPRT)
|
|
set(HIPRT_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for HIP-RT kernels")
|
|
mark_as_advanced(HIPRT_COMPILER_PARALLEL_JOBS)
|
|
|
|
set(bvh_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd.hipfb)
|
|
set(bvh_file_oro ${CMAKE_CURRENT_BINARY_DIR}/oro_compiled_kernels.hipfb)
|
|
|
|
if(WIN32)
|
|
set(hiprt_compile_command ${CMAKE_COMMAND})
|
|
set(hiprt_compile_flags
|
|
-E env "HIP_PATH=${HIP_ROOT_DIR}"
|
|
${HIP_HIPCC_EXECUTABLE}.bat)
|
|
else()
|
|
set(hiprt_compile_command ${HIP_HIPCC_EXECUTABLE})
|
|
set(hiprt_compile_flags)
|
|
endif()
|
|
set(target_gpus)
|
|
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
|
|
list(APPEND target_gpus "--offload-arch=${arch}")
|
|
endforeach()
|
|
|
|
if(WITH_NANOVDB)
|
|
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_NANOVDB)
|
|
endif()
|
|
|
|
if(WITH_CYCLES_DEBUG)
|
|
set(hiprt_compile_flags ${hiprt_compile_flags} -D WITH_CYCLES_DEBUG)
|
|
endif()
|
|
|
|
if(WIN32 AND (${HIP_VERSION} STREQUAL "6.1.40252"))
|
|
message(WARNING "HIP SDK ${HIP_VERSION} has known rendering artifacts with HIPRT. 5.7 is recommended instead")
|
|
endif()
|
|
|
|
set(hiprt_compile_flags_bvh
|
|
${hiprt_compile_flags}
|
|
${target_gpus}
|
|
${HIP_HIPCC_FLAGS}
|
|
-x hip
|
|
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h
|
|
${flags}
|
|
-D HIPRT_BITCODE_LINKING
|
|
-std=c++17
|
|
-mllvm
|
|
-amdgpu-early-inline-all=false
|
|
-mllvm
|
|
-amdgpu-function-calls=true
|
|
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
|
--genco
|
|
-I ${HIPRT_INCLUDE_DIR}
|
|
-Wno-parentheses-equality
|
|
-Wno-unused-value
|
|
-ffast-math
|
|
-o ${bvh_file})
|
|
|
|
set(hiprt_compile_flags_bvh_oro
|
|
${hiprt_compile_flags}
|
|
${target_gpus}
|
|
${HIP_HIPCC_FLAGS}
|
|
-x hip
|
|
${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h
|
|
${flags}
|
|
-D HIPRT_BITCODE_LINKING
|
|
-std=c++17
|
|
-mllvm
|
|
-amdgpu-early-inline-all=false
|
|
-mllvm
|
|
-amdgpu-function-calls=true
|
|
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
|
--genco
|
|
-I ${HIPRT_INCLUDE_DIR}/contrib/Orochi
|
|
-include hip/hip_runtime.h
|
|
-Wno-parentheses-equality
|
|
-Wno-unused-value
|
|
-ffast-math
|
|
-o ${bvh_file_oro})
|
|
|
|
add_custom_command(
|
|
OUTPUT ${bvh_file}
|
|
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh}
|
|
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels.h)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${bvh_file_oro}
|
|
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_bvh_oro}
|
|
DEPENDS ${HIPRT_INCLUDE_DIR}/contrib/Orochi/ParallelPrimitives/RadixSortKernels.h)
|
|
|
|
delayed_install("" "${bvh_file}" ${cycles_kernel_runtime_lib_target_path})
|
|
delayed_install("" "${bvh_file_oro}" ${cycles_kernel_runtime_lib_target_path})
|
|
|
|
if(WITH_CYCLES_HIP_BINARIES)
|
|
set(hiprt_sources device/hiprt/kernel.cpp
|
|
${SRC_KERNEL_HEADERS}
|
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
|
|
${SRC_UTIL_HEADERS}
|
|
)
|
|
|
|
set(cycles_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc)
|
|
set(sdk_bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/hiprt${HIPRT_VERSION}_${HIP_VERSION_SHORT}_amd_lib.bc)
|
|
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb)
|
|
set(hiprt_file_compressed ${hiprt_file}.zst)
|
|
set(kernel_sources ${hiprt_sources})
|
|
set(hiprt_kernel_src "/device/hiprt/kernel.cpp")
|
|
|
|
set(hiprt_compile_flags_sdk_bc
|
|
${hiprt_compile_flags}
|
|
${target_gpus}
|
|
${HIP_HIPCC_FLAGS}
|
|
${flags}
|
|
-x hip
|
|
${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h
|
|
-D HIPRT_BITCODE_LINKING
|
|
-std=c++17
|
|
-fgpu-rdc
|
|
-c
|
|
--gpu-bundle-output
|
|
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
|
-emit-llvm
|
|
-I ${HIPRT_INCLUDE_DIR}
|
|
-Wno-parentheses-equality
|
|
-Wno-unused-value
|
|
-ffast-math
|
|
-o ${sdk_bitcode_file})
|
|
|
|
set(hiprt_compile_flags_cycles_bc
|
|
${hiprt_compile_flags}
|
|
${target_gpus}
|
|
${HIP_HIPCC_FLAGS}
|
|
${CMAKE_CURRENT_SOURCE_DIR}${hiprt_kernel_src}
|
|
${flags}
|
|
-D CCL_NAMESPACE_BEGIN=
|
|
-D CCL_NAMESPACE_END=
|
|
-D HIPCC
|
|
-D __HIPRT__
|
|
-std=c++17
|
|
-fgpu-rdc
|
|
-c
|
|
--gpu-bundle-output
|
|
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
|
-emit-llvm
|
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/..
|
|
-I ${CMAKE_CURRENT_SOURCE_DIR}/device/hiprt
|
|
-I ${HIPRT_INCLUDE_DIR}
|
|
-Wno-parentheses-equality
|
|
-Wno-unused-value
|
|
-ffast-math
|
|
-o ${cycles_bitcode_file}
|
|
)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${cycles_bitcode_file}
|
|
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_cycles_bc}
|
|
DEPENDS ${kernel_sources}
|
|
)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${sdk_bitcode_file}
|
|
COMMAND ${hiprt_compile_command} ${hiprt_compile_flags_sdk_bc}
|
|
DEPENDS ${HIPRT_INCLUDE_DIR}/hiprt/impl/hiprt_kernels_bitcode.h
|
|
)
|
|
|
|
if(WIN32)
|
|
set(hiprt_link_command ${CMAKE_COMMAND})
|
|
set(hiprt_link_flags -E env "HIP_PATH=${HIP_ROOT_DIR}"
|
|
${HIP_LINKER_EXECUTABLE}
|
|
)
|
|
else()
|
|
set(hiprt_link_command ${HIP_LINKER_EXECUTABLE})
|
|
set(hiprt_link_flags)
|
|
endif()
|
|
|
|
set(hiprt_link_flags
|
|
${hiprt_link_flags}
|
|
${target_gpus}
|
|
-fgpu-rdc
|
|
--hip-link
|
|
--cuda-device-only
|
|
-parallel-jobs=${HIPRT_COMPILER_PARALLEL_JOBS}
|
|
${cycles_bitcode_file}
|
|
${sdk_bitcode_file}
|
|
-o ${hiprt_file}
|
|
)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${hiprt_file}
|
|
COMMAND ${hiprt_link_command} ${hiprt_link_flags}
|
|
DEPENDS ${cycles_bitcode_file} ${sdk_bitcode_file}
|
|
)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${hiprt_file_compressed}
|
|
COMMAND "$<TARGET_FILE:zstd_compress>" ${hiprt_file} ${hiprt_file_compressed}
|
|
DEPENDS ${hiprt_file}
|
|
)
|
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
|
|
|
endif()
|
|
|
|
add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed} ${bvh_file} ${bvh_file_oro})
|
|
cycles_set_solution_folder(cycles_kernel_hiprt)
|
|
endif()
|
|
|
|
# OptiX PTX modules
|
|
|
|
if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES)
|
|
macro(cycles_optix_kernel_add name input flags)
|
|
set(output "${CMAKE_CURRENT_BINARY_DIR}/${name}.ptx")
|
|
set(output_compressed "${output}.zst")
|
|
|
|
set(cuda_flags ${flags}
|
|
-I "${OPTIX_INCLUDE_DIR}"
|
|
-I "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
|
-I "${CMAKE_CURRENT_SOURCE_DIR}/device/cuda"
|
|
--use_fast_math
|
|
-Wno-deprecated-gpu-targets
|
|
-o ${output})
|
|
|
|
if(CUDA_HOST_COMPILER)
|
|
set(cuda_flags ${cuda_flags}
|
|
-ccbin="${CUDA_HOST_COMPILER}")
|
|
endif()
|
|
|
|
if(WITH_NANOVDB)
|
|
set(cuda_flags ${cuda_flags}
|
|
-D WITH_NANOVDB)
|
|
endif()
|
|
|
|
if(WITH_CYCLES_OSL)
|
|
set(cuda_flags ${cuda_flags}
|
|
-D OSL_LIBRARY_VERSION_CODE=${OSL_LIBRARY_VERSION_CODE})
|
|
endif()
|
|
|
|
if(WITH_CYCLES_DEBUG)
|
|
set(cuda_flags ${cuda_flags} -D WITH_CYCLES_DEBUG)
|
|
endif()
|
|
|
|
add_custom_command(
|
|
OUTPUT
|
|
${output}
|
|
DEPENDS
|
|
${input}
|
|
${SRC_KERNEL_HEADERS}
|
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
|
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
|
${SRC_UTIL_HEADERS}
|
|
COMMAND
|
|
${CUDA_NVCC_EXECUTABLE}
|
|
--ptx
|
|
-arch=sm_50
|
|
${cuda_flags}
|
|
${input}
|
|
WORKING_DIRECTORY
|
|
"${CMAKE_CURRENT_SOURCE_DIR}"
|
|
)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${output_compressed}
|
|
COMMAND "$<TARGET_FILE:zstd_compress>" ${output} ${output_compressed}
|
|
DEPENDS ${output})
|
|
|
|
list(APPEND optix_ptx ${output_compressed})
|
|
|
|
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output_compressed}" ${CYCLES_INSTALL_PATH}/lib)
|
|
endmacro()
|
|
|
|
cycles_optix_kernel_add(
|
|
kernel_optix
|
|
"device/optix/kernel.cu"
|
|
"")
|
|
cycles_optix_kernel_add(
|
|
kernel_optix_shader_raytrace
|
|
"device/optix/kernel_shader_raytrace.cu"
|
|
"--keep-device-functions")
|
|
if(WITH_CYCLES_OSL)
|
|
cycles_optix_kernel_add(
|
|
kernel_optix_osl
|
|
"device/optix/kernel_osl.cu"
|
|
"--relocatable-device-code=true")
|
|
cycles_optix_kernel_add(
|
|
kernel_optix_osl_services
|
|
"osl/services_optix.cu"
|
|
"--relocatable-device-code=true")
|
|
endif()
|
|
|
|
add_custom_target(cycles_kernel_optix ALL DEPENDS ${optix_ptx})
|
|
cycles_set_solution_folder(cycles_kernel_optix)
|
|
endif()
|
|
|
|
# oneAPI module
|
|
|
|
if(WITH_CYCLES_DEVICE_ONEAPI)
|
|
if(WITH_CYCLES_ONEAPI_BINARIES)
|
|
set(cycles_kernel_oneapi_lib_suffix "_aot")
|
|
else()
|
|
set(cycles_kernel_oneapi_lib_suffix "_jit")
|
|
endif()
|
|
|
|
if(WIN32)
|
|
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.dll)
|
|
set(cycles_kernel_oneapi_linker_lib ${CMAKE_CURRENT_BINARY_DIR}/cycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.lib)
|
|
else()
|
|
set(cycles_kernel_oneapi_lib ${CMAKE_CURRENT_BINARY_DIR}/libcycles_kernel_oneapi${cycles_kernel_oneapi_lib_suffix}.so)
|
|
endif()
|
|
|
|
set(cycles_oneapi_kernel_sources
|
|
${SRC_KERNEL_DEVICE_ONEAPI}
|
|
${SRC_KERNEL_HEADERS}
|
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
|
|
${SRC_UTIL_HEADERS}
|
|
)
|
|
|
|
set(SYCL_OFFLINE_COMPILER_PARALLEL_JOBS 1 CACHE STRING "Number of parallel compiler instances to use for device binaries compilation (expect ~8GB peak memory usage per instance).")
|
|
mark_as_advanced(SYCL_OFFLINE_COMPILER_PARALLEL_JOBS)
|
|
|
|
if(WITH_CYCLES_ONEAPI_BINARIES)
|
|
message(STATUS "${SYCL_OFFLINE_COMPILER_PARALLEL_JOBS} instance(s) of oneAPI offline compiler will be used.")
|
|
endif()
|
|
# SYCL_CPP_FLAGS is a variable that the user can set to pass extra compiler options
|
|
set(sycl_compiler_flags
|
|
${CMAKE_CURRENT_SOURCE_DIR}/${SRC_KERNEL_DEVICE_ONEAPI}
|
|
-fsycl
|
|
-fsycl-unnamed-lambda
|
|
-fdelayed-template-parsing
|
|
-mllvm -inlinedefault-threshold=250
|
|
-mllvm -inlinehint-threshold=350
|
|
-fsycl-device-code-split=per_kernel
|
|
-fsycl-max-parallel-link-jobs=${SYCL_OFFLINE_COMPILER_PARALLEL_JOBS}
|
|
--offload-compress
|
|
--offload-compression-level=19
|
|
-shared
|
|
-DWITH_ONEAPI
|
|
-O2
|
|
-fno-fast-math
|
|
-ffp-contract=fast
|
|
-fassociative-math
|
|
-freciprocal-math
|
|
-fno-signed-zeros
|
|
-ffinite-math-only
|
|
-D__KERNEL_LOCAL_ATOMIC_SORT__
|
|
-o"${cycles_kernel_oneapi_lib}"
|
|
-I"${CMAKE_CURRENT_SOURCE_DIR}/.."
|
|
${SYCL_CPP_FLAGS}
|
|
)
|
|
|
|
if(WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
|
|
list(APPEND sycl_compiler_flags -DWITH_ONEAPI_SYCL_HOST_TASK)
|
|
endif()
|
|
|
|
# Set defaults for spir64 and spir64_gen options
|
|
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64)
|
|
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64 "-options '-cl-fast-relaxed-math -ze-opt-large-register-file -ze-opt-regular-grf-kernel integrator_intersect -ze-opt-large-grf-kernel shade -ze-opt-no-local-to-generic'")
|
|
endif()
|
|
if(NOT DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
|
|
set(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}" CACHE STRING "Extra build options for spir64_gen target")
|
|
mark_as_advanced(CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen)
|
|
endif()
|
|
# Enable `zebin`, a graphics binary format with improved compatibility.
|
|
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "--format zebin ")
|
|
|
|
# Host execution won't use GPU binaries, no need to compile them.
|
|
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
|
|
# Add the list of Intel devices to build binaries for.
|
|
foreach(device ${CYCLES_ONEAPI_INTEL_BINARIES_ARCH})
|
|
# Run `ocloc` ids to test if the device is supported.
|
|
if(WIN32)
|
|
execute_process(
|
|
COMMAND ${OCLOC_INSTALL_DIR}/ocloc.exe ids ${device}
|
|
RESULT_VARIABLE oclocids_ret
|
|
OUTPUT_QUIET
|
|
ERROR_QUIET
|
|
)
|
|
else()
|
|
execute_process(
|
|
COMMAND ${CMAKE_COMMAND}
|
|
-E env "LD_LIBRARY_PATH=${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib"
|
|
${OCLOC_INSTALL_DIR}/bin/ocloc ids ${device}
|
|
|
|
RESULT_VARIABLE oclocids_ret
|
|
OUTPUT_QUIET
|
|
ERROR_QUIET
|
|
)
|
|
endif()
|
|
if(NOT oclocids_ret EQUAL 0)
|
|
list(REMOVE_ITEM CYCLES_ONEAPI_INTEL_BINARIES_ARCH ${device})
|
|
message(STATUS "Cycles oneAPI: binaries for ${device} not supported by Intel Graphics Compiler/ocloc, skipped.")
|
|
endif()
|
|
endforeach()
|
|
list(JOIN CYCLES_ONEAPI_INTEL_BINARIES_ARCH "," gen_devices_string)
|
|
if("${gen_devices_string}" STREQUAL "")
|
|
# Don't compile spir64_gen if no device is targeted
|
|
message(STATUS "Cycles oneAPI: skipping spir64_gen compilation as no devices are targeted.")
|
|
list(REMOVE_ITEM CYCLES_ONEAPI_SYCL_TARGETS spir64_gen)
|
|
else()
|
|
string(PREPEND CYCLES_ONEAPI_SYCL_OPTIONS_spir64_gen "-device ${gen_devices_string} ")
|
|
endif()
|
|
|
|
# Iterate over all targets and their options.
|
|
list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
|
|
list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
|
|
foreach(target ${CYCLES_ONEAPI_SYCL_TARGETS})
|
|
if(DEFINED CYCLES_ONEAPI_SYCL_OPTIONS_${target})
|
|
list(APPEND sycl_compiler_flags
|
|
"-Xsycl-target-backend=${target} \"${CYCLES_ONEAPI_SYCL_OPTIONS_${target}}\""
|
|
)
|
|
endif()
|
|
endforeach()
|
|
else()
|
|
# If AOT is disabled, build for spir64
|
|
list(APPEND sycl_compiler_flags
|
|
-fsycl-targets=spir64
|
|
"-Xsycl-target-backend=spir64 \"${CYCLES_ONEAPI_SYCL_OPTIONS_spir64}\"")
|
|
endif()
|
|
|
|
if(WITH_NANOVDB)
|
|
list(APPEND sycl_compiler_flags
|
|
-DWITH_NANOVDB)
|
|
endif()
|
|
|
|
if(WITH_CYCLES_EMBREE AND EMBREE_SYCL_SUPPORT)
|
|
list(APPEND sycl_compiler_flags
|
|
-DWITH_EMBREE
|
|
-DWITH_EMBREE_GPU
|
|
-DEMBREE_MAJOR_VERSION=${EMBREE_MAJOR_VERSION}
|
|
-I"${EMBREE_INCLUDE_DIRS}")
|
|
|
|
if(WIN32)
|
|
list(APPEND sycl_compiler_flags
|
|
-ladvapi32.lib
|
|
)
|
|
endif()
|
|
|
|
set(next_library_mode "")
|
|
foreach(library ${EMBREE_LIBRARIES})
|
|
string(TOLOWER "${library}" library_lower)
|
|
if(("${library_lower}" STREQUAL "optimized") OR
|
|
("${library_lower}" STREQUAL "debug"))
|
|
set(next_library_mode "${library_lower}")
|
|
else()
|
|
if(next_library_mode STREQUAL "")
|
|
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
|
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
|
else()
|
|
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
|
endif()
|
|
set(next_library_mode "")
|
|
endif()
|
|
endforeach()
|
|
|
|
foreach(library ${TBB_LIBRARIES})
|
|
string(TOLOWER "${library}" library_lower)
|
|
if(("${library_lower}" STREQUAL "optimized") OR
|
|
("${library_lower}" STREQUAL "debug"))
|
|
set(next_library_mode "${library_lower}")
|
|
else()
|
|
if(next_library_mode STREQUAL "")
|
|
list(APPEND EMBREE_TBB_LIBRARIES_optimized ${library})
|
|
list(APPEND EMBREE_TBB_LIBRARIES_debug ${library})
|
|
else()
|
|
list(APPEND EMBREE_TBB_LIBRARIES_${next_library_mode} ${library})
|
|
endif()
|
|
set(next_library_mode "")
|
|
endif()
|
|
endforeach()
|
|
list(APPEND sycl_compiler_flags
|
|
"$<$<CONFIG:Release>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
|
"$<$<CONFIG:RelWithDebInfo>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
|
"$<$<CONFIG:MinSizeRel>:${EMBREE_TBB_LIBRARIES_optimized}>"
|
|
"$<$<CONFIG:Debug>:${EMBREE_TBB_LIBRARIES_debug}>"
|
|
)
|
|
endif()
|
|
|
|
if(WITH_CYCLES_DEBUG)
|
|
list(APPEND sycl_compiler_flags -DWITH_CYCLES_DEBUG)
|
|
endif()
|
|
|
|
get_filename_component(sycl_compiler_root ${SYCL_COMPILER} DIRECTORY)
|
|
|
|
if(WIN32) # Add Windows specific compiler flags.
|
|
list(APPEND sycl_compiler_flags
|
|
-fuse-ld=link
|
|
-fms-extensions
|
|
-fms-compatibility
|
|
-D_WINDLL
|
|
-D_MBCS
|
|
-DWIN32
|
|
-D_WINDOWS
|
|
-D_CRT_NONSTDC_NO_DEPRECATE
|
|
-D_CRT_SECURE_NO_DEPRECATE
|
|
-DONEAPI_EXPORT
|
|
)
|
|
|
|
string(REPLACE /Redist/ /Tools/ MSVC_TOOLS_DIR ${MSVC_REDIST_DIR})
|
|
# Version Folder between Redist and Tools can mismatch sometimes
|
|
if(NOT EXISTS ${MSVC_TOOLS_DIR})
|
|
get_filename_component(cmake_ar_dir ${CMAKE_AR} DIRECTORY)
|
|
get_filename_component(MSVC_TOOLS_DIR "${cmake_ar_dir}/../../../" ABSOLUTE)
|
|
endif()
|
|
if(CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION)
|
|
set(WINDOWS_KIT_DIR ${WINDOWS_KITS_DIR}/Lib/${CMAKE_VS_WINDOWS_TARGET_PLATFORM_VERSION})
|
|
else() # case for Ninja on Windows
|
|
get_filename_component(cmake_mt_dir ${CMAKE_MT} DIRECTORY)
|
|
string(REPLACE /bin/ /Lib/ WINDOWS_KIT_DIR ${cmake_mt_dir})
|
|
get_filename_component(WINDOWS_KIT_DIR "${WINDOWS_KIT_DIR}/../" ABSOLUTE)
|
|
endif()
|
|
list(APPEND sycl_compiler_flags
|
|
-L"${MSVC_TOOLS_DIR}/lib/x64"
|
|
-L"${WINDOWS_KIT_DIR}/um/x64"
|
|
-L"${WINDOWS_KIT_DIR}/ucrt/x64"
|
|
)
|
|
else() # Add Linux specific compiler flags.
|
|
list(APPEND sycl_compiler_flags -fPIC)
|
|
list(APPEND sycl_compiler_flags -fvisibility=hidden)
|
|
|
|
# Add $ORIGIN to `cycles_kernel_oneapi.so` RPATH so `libsycl.so` and
|
|
# `libpi_level_zero.so` can be placed next to it and get found.
|
|
list(APPEND sycl_compiler_flags -Wl,-rpath,'$$ORIGIN')
|
|
endif()
|
|
|
|
# Create CONFIG specific compiler flags.
|
|
set(sycl_compiler_flags_Release ${sycl_compiler_flags})
|
|
set(sycl_compiler_flags_Debug ${sycl_compiler_flags})
|
|
set(sycl_compiler_flags_RelWithDebInfo ${sycl_compiler_flags})
|
|
|
|
list(APPEND sycl_compiler_flags_Release
|
|
-DNDEBUG
|
|
)
|
|
list(APPEND sycl_compiler_flags_RelWithDebInfo
|
|
-DNDEBUG
|
|
-g
|
|
)
|
|
list(APPEND sycl_compiler_flags_Debug
|
|
-g
|
|
)
|
|
|
|
if(WIN32)
|
|
list(APPEND sycl_compiler_flags_Debug
|
|
-D_DEBUG
|
|
-nostdlib
|
|
-Xclang --dependent-lib=msvcrtd
|
|
)
|
|
add_custom_command(
|
|
OUTPUT ${cycles_kernel_oneapi_lib} ${cycles_kernel_oneapi_linker_lib}
|
|
COMMAND ${CMAKE_COMMAND} -E env
|
|
# Set `LIB` for the compiler to find `sycl.lib` and in case of `icpx`, `libircmt.lib`.
|
|
"LIB=${sycl_compiler_root}/../lib\;${sycl_compiler_root}/../compiler/lib/intel64_win"
|
|
"PATH=${OCLOC_INSTALL_DIR}\;${sycl_compiler_root}"
|
|
${SYCL_COMPILER}
|
|
"$<$<CONFIG:Release>:${sycl_compiler_flags_Release}>"
|
|
"$<$<CONFIG:RelWithDebInfo>:${sycl_compiler_flags_RelWithDebInfo}>"
|
|
"$<$<CONFIG:Debug>:${sycl_compiler_flags_Debug}>"
|
|
"$<$<CONFIG:MinSizeRel>:${sycl_compiler_flags_Release}>"
|
|
COMMAND_EXPAND_LISTS
|
|
DEPENDS ${cycles_oneapi_kernel_sources} ${SYCL_COMPILER})
|
|
else()
|
|
if(NOT IGC_INSTALL_DIR)
|
|
get_filename_component(IGC_INSTALL_DIR "${sycl_compiler_root}/../lib/igc" ABSOLUTE)
|
|
endif()
|
|
# The following join/replace operations are to prevent cmake from
|
|
# escaping space chars with backslashes in add_custom_command.
|
|
list(JOIN sycl_compiler_flags_Release " " sycl_compiler_flags_Release_str)
|
|
string(REPLACE " " ";" sycl_compiler_flags_Release_str ${sycl_compiler_flags_Release_str})
|
|
list(JOIN sycl_compiler_flags_RelWithDebInfo " " sycl_compiler_flags_RelWithDebInfo_str)
|
|
string(REPLACE " " ";" sycl_compiler_flags_RelWithDebInfo_str ${sycl_compiler_flags_RelWithDebInfo_str})
|
|
list(JOIN sycl_compiler_flags_Debug " " sycl_compiler_flags_Debug_str)
|
|
string(REPLACE " " ";" sycl_compiler_flags_Debug_str ${sycl_compiler_flags_Debug_str})
|
|
add_custom_command(
|
|
OUTPUT ${cycles_kernel_oneapi_lib}
|
|
COMMAND
|
|
${CMAKE_COMMAND} -E env
|
|
"LD_LIBRARY_PATH=${sycl_compiler_root}/../lib:${OCLOC_INSTALL_DIR}/lib:${IGC_INSTALL_DIR}/lib"
|
|
# `$ENV{PATH}` is for compiler to find `ld`.
|
|
"PATH=${OCLOC_INSTALL_DIR}/bin:${sycl_compiler_root}:$ENV{PATH}"
|
|
${SYCL_COMPILER}
|
|
"$<$<CONFIG:Release>:${sycl_compiler_flags_Release_str}>"
|
|
"$<$<CONFIG:RelWithDebInfo>:${sycl_compiler_flags_RelWithDebInfo_str}>"
|
|
"$<$<CONFIG:Debug>:${sycl_compiler_flags_Debug_str}>"
|
|
"$<$<CONFIG:MinSizeRel>:${sycl_compiler_flags_Release_str}>"
|
|
COMMAND_EXPAND_LISTS
|
|
DEPENDS ${cycles_oneapi_kernel_sources} ${SYCL_COMPILER})
|
|
endif()
|
|
|
|
# install dynamic libraries required at runtime
|
|
delayed_install("" "${cycles_kernel_oneapi_lib}" ${cycles_kernel_runtime_lib_target_path})
|
|
|
|
add_custom_target(cycles_kernel_oneapi ALL DEPENDS ${cycles_kernel_oneapi_lib})
|
|
endif()
|
|
|
|
# OSL module
|
|
|
|
if(WITH_CYCLES_OSL)
|
|
list(APPEND LIB
|
|
cycles_kernel_osl
|
|
)
|
|
add_subdirectory(osl)
|
|
add_subdirectory(osl/shaders)
|
|
endif()
|
|
|
|
# CPU module
|
|
|
|
include_directories(${INC})
|
|
include_directories(SYSTEM ${INC_SYS})
|
|
|
|
if(WITH_COMPILER_ASAN)
|
|
if(CMAKE_COMPILER_IS_GNUCC AND (NOT WITH_CYCLES_KERNEL_ASAN))
|
|
# GCC hangs compiling the big kernel files with ASAN and release, so disable by default.
|
|
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=all")
|
|
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
|
elseif(CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|
# With OSL, Cycles disables RTTI in some modules, which then breaks at linking
|
|
# when trying to use `vptr` sanitizer (included into 'undefined' general option).
|
|
string(APPEND CMAKE_CXX_FLAGS_RELWITHDEBINFO " -fno-sanitize=vptr")
|
|
string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-sanitize=vptr")
|
|
endif()
|
|
endif()
|
|
|
|
if(DEFINED CYCLES_KERNEL_FLAGS)
|
|
set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
|
|
endif()
|
|
|
|
if(CXX_HAS_AVX2)
|
|
set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
|
|
endif()
|
|
|
|
# Warnings to avoid using doubles in the kernel.
|
|
if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
|
add_check_cxx_compiler_flags(
|
|
CMAKE_CXX_FLAGS
|
|
_has_cxxflag_float_conversion "-Werror=float-conversion"
|
|
_has_cxxflag_double_promotion "-Werror=double-promotion"
|
|
)
|
|
unset(_has_cxxflag_float_conversion)
|
|
unset(_has_cxxflag_double_promotion)
|
|
endif()
|
|
|
|
cycles_add_library(cycles_kernel "${LIB}"
|
|
${SRC_KERNEL_DEVICE_CPU}
|
|
${SRC_KERNEL_DEVICE_CUDA}
|
|
${SRC_KERNEL_DEVICE_HIP}
|
|
${SRC_KERNEL_DEVICE_HIPRT}
|
|
${SRC_KERNEL_DEVICE_OPTIX}
|
|
${SRC_KERNEL_DEVICE_METAL}
|
|
${SRC_KERNEL_HEADERS}
|
|
${SRC_KERNEL_DEVICE_CPU_HEADERS}
|
|
${SRC_KERNEL_DEVICE_GPU_HEADERS}
|
|
${SRC_KERNEL_DEVICE_CUDA_HEADERS}
|
|
${SRC_KERNEL_DEVICE_HIP_HEADERS}
|
|
${SRC_KERNEL_DEVICE_HIPRT_HEADERS}
|
|
${SRC_KERNEL_DEVICE_OPTIX_HEADERS}
|
|
${SRC_KERNEL_DEVICE_METAL_HEADERS}
|
|
${SRC_KERNEL_DEVICE_ONEAPI_HEADERS}
|
|
)
|
|
|
|
source_group("bake" FILES ${SRC_KERNEL_BAKE_HEADERS})
|
|
source_group("bvh" FILES ${SRC_KERNEL_BVH_HEADERS})
|
|
source_group("camera" FILES ${SRC_KERNEL_CAMERA_HEADERS})
|
|
source_group("closure" FILES ${SRC_KERNEL_CLOSURE_HEADERS})
|
|
source_group("device\\cpu" FILES ${SRC_KERNEL_DEVICE_CPU} ${SRC_KERNEL_DEVICE_CPU_HEADERS})
|
|
source_group("device\\cuda" FILES ${SRC_KERNEL_DEVICE_CUDA} ${SRC_KERNEL_DEVICE_CUDA_HEADERS})
|
|
source_group("device\\gpu" FILES ${SRC_KERNEL_DEVICE_GPU_HEADERS})
|
|
source_group("device\\hip" FILES ${SRC_KERNEL_DEVICE_HIP} ${SRC_KERNEL_DEVICE_HIP_HEADERS})
|
|
source_group("device\\hiprt" FILES ${SRC_KERNEL_DEVICE_HIPRT} ${SRC_KERNEL_DEVICE_HIPRT_HEADERS})
|
|
source_group("device\\optix" FILES ${SRC_KERNEL_DEVICE_OPTIX} ${SRC_KERNEL_DEVICE_OPTIX_HEADERS})
|
|
source_group("device\\metal" FILES ${SRC_KERNEL_DEVICE_METAL} ${SRC_KERNEL_DEVICE_METAL_HEADERS})
|
|
source_group("device\\oneapi" FILES ${SRC_KERNEL_DEVICE_ONEAPI} ${SRC_KERNEL_DEVICE_ONEAPI_HEADERS})
|
|
source_group("film" FILES ${SRC_KERNEL_FILM_HEADERS})
|
|
source_group("geom" FILES ${SRC_KERNEL_GEOM_HEADERS})
|
|
source_group("integrator" FILES ${SRC_KERNEL_INTEGRATOR_HEADERS})
|
|
source_group("kernel" FILES ${SRC_KERNEL_TYPES_HEADERS})
|
|
source_group("light" FILES ${SRC_KERNEL_LIGHT_HEADERS})
|
|
source_group("osl" FILES ${SRC_KERNEL_OSL_HEADERS})
|
|
source_group("sample" FILES ${SRC_KERNEL_SAMPLE_HEADERS})
|
|
source_group("svm" FILES ${SRC_KERNEL_SVM_HEADERS})
|
|
source_group("util" FILES ${SRC_KERNEL_UTIL_HEADERS})
|
|
|
|
set(_cycles_prev_gpu_kernel_target)
|
|
macro(cycles_add_gpu_kernel_dependencies GPU_KERNEL_TARGET)
|
|
if (TARGET ${GPU_KERNEL_TARGET})
|
|
add_dependencies(cycles_kernel ${GPU_KERNEL_TARGET})
|
|
if (_cycles_prev_gpu_kernel_target)
|
|
add_dependencies(${GPU_KERNEL_TARGET} ${_cycles_prev_gpu_kernel_target})
|
|
endif()
|
|
set(_cycles_prev_gpu_kernel_target ${GPU_KERNEL_TARGET})
|
|
endif()
|
|
endmacro()
|
|
|
|
cycles_add_gpu_kernel_dependencies(cycles_kernel_cuda)
|
|
cycles_add_gpu_kernel_dependencies(cycles_kernel_optix)
|
|
cycles_add_gpu_kernel_dependencies(cycles_kernel_hip)
|
|
cycles_add_gpu_kernel_dependencies(cycles_kernel_hiprt)
|
|
cycles_add_gpu_kernel_dependencies(cycles_kernel_oneapi)
|
|
|
|
# Install kernel source for runtime compilation
|
|
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_BAKE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bake)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_BVH_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/bvh)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_CAMERA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/camera)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/closure)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_CUDA}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_CUDA_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/cuda)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_GPU_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/gpu)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIP_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hip)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt)
|
|
if(WITH_CYCLES_DEVICE_HIPRT)
|
|
delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt)
|
|
delayed_install(${HIPRT_INCLUDE_DIR} "${SRC_KERNEL_DEVICE_HIPRT_SDK}" ${CYCLES_INSTALL_PATH}/source/kernel/device/hiprt/hiprt/impl)
|
|
endif()
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_OPTIX_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/optix)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_DEVICE_METAL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/device/metal)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_FILM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/film)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_GEOM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/geom)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_INTEGRATOR_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/integrator)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_LIGHT_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/light)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_OSL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/osl)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SAMPLE_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/sample)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/svm)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_TYPES_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_KERNEL_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/kernel/util)
|
|
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/source/util)
|