Files
test/intern/cycles/kernel/CMakeLists.txt
Brecht Van Lommel 16204bd647 Cycles: prepare to make CUDA 5.0 the official version we use
* Add CUDA compiler version detection to cmake/scons/runtime
* Remove noinline in kernel_shader.h and reenable --use_fast_math if CUDA 5.x
  is used, these were workarounds for CUDA 4.2 bugs
* Change max number of registers to 32 for sm 2.x (based on performance tests
  from Martijn Berger and confirmed here), and also for NVidia OpenCL.

Overall it seems that with these changes and the latest CUDA 5.0 download, that
performance is as good as or better than the 2.67b release with the scenes and
graphics cards I tested.
2013-06-19 17:54:23 +00:00

231 lines
6.0 KiB
CMake

set(INC
.
../util
osl
svm
)
set(INC_SYS
)
set(SRC
kernel.cpp
kernel_sse2.cpp
kernel_sse3.cpp
kernel.cl
kernel.cu
)
set(SRC_HEADERS
kernel.h
kernel_accumulate.h
kernel_bvh.h
kernel_bvh_traversal.h
kernel_camera.h
kernel_compat_cpu.h
kernel_compat_cuda.h
kernel_compat_opencl.h
kernel_curve.h
kernel_differential.h
kernel_displace.h
kernel_emission.h
kernel_film.h
kernel_globals.h
kernel_jitter.h
kernel_light.h
kernel_math.h
kernel_montecarlo.h
kernel_object.h
kernel_passes.h
kernel_path.h
kernel_primitive.h
kernel_projection.h
kernel_random.h
kernel_shader.h
kernel_subsurface.h
kernel_textures.h
kernel_triangle.h
kernel_types.h
)
set(SRC_CLOSURE_HEADERS
closure/bsdf.h
closure/bsdf_ashikhmin_velvet.h
closure/bsdf_diffuse.h
closure/bsdf_diffuse_ramp.h
closure/bsdf_microfacet.h
closure/bsdf_oren_nayar.h
closure/bsdf_phong_ramp.h
closure/bsdf_reflection.h
closure/bsdf_refraction.h
closure/bsdf_toon.h
closure/bsdf_transparent.h
closure/bsdf_util.h
closure/bsdf_ward.h
closure/bsdf_westin.h
closure/bssrdf.h
closure/emissive.h
closure/volume.h
)
set(SRC_SVM_HEADERS
svm/svm.h
svm/svm_attribute.h
svm/svm_camera.h
svm/svm_closure.h
svm/svm_convert.h
svm/svm_checker.h
svm/svm_brick.h
svm/svm_displace.h
svm/svm_fresnel.h
svm/svm_wireframe.h
svm/svm_wavelength.h
svm/svm_gamma.h
svm/svm_brightness.h
svm/svm_geometry.h
svm/svm_gradient.h
svm/svm_hsv.h
svm/svm_image.h
svm/svm_invert.h
svm/svm_light_path.h
svm/svm_magic.h
svm/svm_mapping.h
svm/svm_math.h
svm/svm_mix.h
svm/svm_musgrave.h
svm/svm_noise.h
svm/svm_noisetex.h
svm/svm_normal.h
svm/svm_ramp.h
svm/svm_sepcomb_rgb.h
svm/svm_sky.h
svm/svm_tex_coord.h
svm/svm_texture.h
svm/svm_types.h
svm/svm_value.h
svm/svm_voronoi.h
svm/svm_wave.h
)
set(SRC_UTIL_HEADERS
../util/util_color.h
../util/util_math.h
../util/util_transform.h
../util/util_types.h
)
# CUDA module
if(WITH_CYCLES_CUDA_BINARIES)
# 32 bit or 64 bit
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
set(CUDA_BITS 64)
else()
set(CUDA_BITS 32)
endif()
# CUDA version
execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT})
string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
set(CUDA_VERSION "${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}")
# build for each arch
set(cuda_sources kernel.cu ${SRC_HEADERS} ${SRC_SVM_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_UTIL_HEADERS})
set(cuda_cubins)
foreach(arch ${CYCLES_CUDA_BINARIES_ARCH})
set(cuda_cubin kernel_${arch}.cubin)
set(cuda_version_flags "-D__KERNEL_CUDA_VERSION__=${CUDA_VERSION}")
# warn for other versions
if(CUDA_VERSION MATCHES "50")
else()
message(STATUS "CUDA version ${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} detected, build may succeed but only CUDA 5.0 is officially supported")
endif()
# build flags depending on CUDA version and arch
if(CUDA_VERSION LESS 50)
# CUDA 4.x
if(${arch} MATCHES "sm_1[0-9]")
# sm_1x
set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
elseif(${arch} MATCHES "sm_2[0-9]")
# sm_2x
set(cuda_arch_flags "--maxrregcount=24")
else()
# sm_3x
set(cuda_arch_flags "--maxrregcount=32")
endif()
set(cuda_math_flags "")
else()
# CUDA 5.x
if(${arch} MATCHES "sm_1[0-9]")
# sm_1x
set(cuda_arch_flags "--maxrregcount=24 --opencc-options -OPT:Olimit=0")
elseif(${arch} MATCHES "sm_2[0-9]")
# sm_2x
set(cuda_arch_flags "--maxrregcount=32")
else()
# sm_3x
set(cuda_arch_flags "--maxrregcount=32")
endif()
set(cuda_math_flags "--use_fast_math")
endif()
add_custom_command(
OUTPUT ${cuda_cubin}
COMMAND ${CUDA_NVCC_EXECUTABLE} -arch=${arch} -m${CUDA_BITS} --cubin ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cu -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_cubin} --ptxas-options="-v" ${cuda_arch_flags} ${cuda_version_flags} ${cuda_math_flags} -I${CMAKE_CURRENT_SOURCE_DIR}/../util -I${CMAKE_CURRENT_SOURCE_DIR}/svm -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -DNVCC
DEPENDS ${cuda_sources})
delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_cubin}" ${CYCLES_INSTALL_PATH}/lib)
list(APPEND cuda_cubins ${cuda_cubin})
endforeach()
add_custom_target(cycles_kernel_cuda ALL DEPENDS ${cuda_cubins})
endif()
# OSL module
if(WITH_CYCLES_OSL)
add_subdirectory(osl)
add_subdirectory(shaders)
endif()
# CPU module
include_directories(${INC})
include_directories(SYSTEM ${INC_SYS})
add_library(cycles_kernel ${SRC} ${SRC_HEADERS} ${SRC_CLOSURE_HEADERS} ${SRC_SVM_HEADERS})
if(WITH_CYCLES_OPTIMIZED_KERNEL)
set_source_files_properties(kernel_sse2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE2_KERNEL_FLAGS}")
set_source_files_properties(kernel_sse3.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE3_KERNEL_FLAGS}")
endif()
if(WITH_CYCLES_CUDA)
add_dependencies(cycles_kernel cycles_kernel_cuda)
endif()
# OpenCL kernel
#set(KERNEL_PREPROCESSED ${CMAKE_CURRENT_BINARY_DIR}/kernel_preprocessed.cl)
#add_custom_command(
# OUTPUT ${KERNEL_PREPROCESSED}
# COMMAND gcc -x c++ -E ${CMAKE_CURRENT_SOURCE_DIR}/kernel.cl -I ${CMAKE_CURRENT_SOURCE_DIR}/../util/ -DCCL_NAMESPACE_BEGIN= -DCCL_NAMESPACE_END= -o ${KERNEL_PREPROCESSED}
# DEPENDS ${SRC_KERNEL} ${SRC_UTIL_HEADERS})
#add_custom_target(cycles_kernel_preprocess ALL DEPENDS ${KERNEL_PREPROCESSED})
#delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${KERNEL_PREPROCESSED}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cl" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "kernel.cu" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_CLOSURE_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/closure)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_SVM_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel/svm)
delayed_install(${CMAKE_CURRENT_SOURCE_DIR} "${SRC_UTIL_HEADERS}" ${CYCLES_INSTALL_PATH}/kernel)