diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 46f08996a72..34735dc57e8 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -463,6 +463,41 @@ endif() # CUDA module +function(cuda_add_common_flags cuda_version in_flags out_flags) + set(flags ${in_flags}) + + if(CUDA_HOST_COMPILER) + set(flags ${flags} -ccbin="${CUDA_HOST_COMPILER}") + endif() + + set(flags ${flags} + # Helps with compatibility when using recent clang host compiler. + "-std=c++17" + --use_fast_math + -Wno-deprecated-gpu-targets) + + if(WITH_CYCLES_DEBUG) + set(flags ${flags} + -D WITH_CYCLES_DEBUG + --ptxas-options="-v") + endif() + + if(WITH_NANOVDB) + set(flags ${flags} -D WITH_NANOVDB) + endif() + + if(NOT WITH_CYCLES_CUDA_BUILD_SERIAL AND "${cuda_version}" GREATER_EQUAL 129) + # Only use split compile with few binaries, to avoid excessive memory usage. + # This is mainly helpful for quick local builds for one architecture. + list(LENGTH CYCLES_CUDA_BINARIES_ARCH _num_binaries) + if(_num_binaries LESS_EQUAL 2) + set(flags ${flags} --split-compile=0) + endif() + endif() + + set(${out_flags} ${flags} PARENT_SCOPE) +endfunction() + if(WITH_CYCLES_CUDA_BINARIES) # 64 bit only set(CUDA_BITS 64) @@ -520,33 +555,9 @@ if(WITH_CYCLES_CUDA_BINARIES) -m ${CUDA_BITS} -I ${CMAKE_CURRENT_SOURCE_DIR}/.. -I ${CMAKE_CURRENT_SOURCE_DIR}/device/cuda - --use_fast_math - -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file} - -Wno-deprecated-gpu-targets) + -o ${CMAKE_CURRENT_BINARY_DIR}/${cuda_file}) - if(CUDA_HOST_COMPILER) - set(cuda_flags ${cuda_flags} - -ccbin="${CUDA_HOST_COMPILER}") - endif() - - if(WITH_NANOVDB) - set(cuda_flags ${cuda_flags} - -D WITH_NANOVDB) - endif() - - if(WITH_CYCLES_DEBUG) - set(cuda_flags ${cuda_flags} - -D WITH_CYCLES_DEBUG - --ptxas-options="-v") - endif() - - if(NOT WITH_CYCLES_CUDA_BUILD_SERIAL AND "${cuda_version}" GREATER_EQUAL 129) - set(cuda_flags ${cuda_flags} - --split-compile=0) - endif() - - # Helps with compatibility when using recent clang host compiler. - set(cuda_flags ${cuda_flags} -std=c++17) + cuda_add_common_flags(${cuda_version} "${cuda_flags}" cuda_flags) set(_cuda_nvcc_args -arch=${arch} @@ -832,30 +843,13 @@ if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_DEVICE_OPTIX) -I "${OPTIX_INCLUDE_DIR}" -I "${CMAKE_CURRENT_SOURCE_DIR}/.." -I "${CMAKE_CURRENT_SOURCE_DIR}/device/cuda" - --use_fast_math - -Wno-deprecated-gpu-targets -o ${output}) - if(CUDA_HOST_COMPILER) - set(cuda_flags ${cuda_flags} - -ccbin="${CUDA_HOST_COMPILER}") - endif() - - if(WITH_NANOVDB) - set(cuda_flags ${cuda_flags} - -D WITH_NANOVDB) - endif() - if(WITH_CYCLES_OSL) set(cuda_flags ${cuda_flags} -D OSL_LIBRARY_VERSION_CODE=${OSL_LIBRARY_VERSION_CODE}) endif() - if(WITH_CYCLES_DEBUG) - set(cuda_flags ${cuda_flags} - -D WITH_CYCLES_DEBUG) - endif() - set(arch compute_50) set(cuda_nvcc_executable ${CUDA_NVCC_EXECUTABLE}) set(cuda_version ${CUDA_VERSION}) @@ -869,10 +863,7 @@ if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_DEVICE_OPTIX) endif() endif() - if(NOT WITH_CYCLES_CUDA_BUILD_SERIAL AND "${cuda_version}" GREATER_EQUAL 129) - set(cuda_flags ${cuda_flags} - --split-compile=0) - endif() + cuda_add_common_flags(${cuda_version} "${cuda_flags}" cuda_flags) add_custom_command( OUTPUT @@ -887,7 +878,6 @@ if(WITH_CYCLES_CUDA_BINARIES AND WITH_CYCLES_DEVICE_OPTIX) COMMAND ${cuda_nvcc_executable} --ptx - -std=c++17 -arch=${arch} ${cuda_flags} ${input} diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index af3093befcc..e3e533876aa 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -298,14 +298,21 @@ ccl_device void volume_voxel_get(KernelGlobals kg, ccl_private OctreeTracing &oc /* If there exists a Light Path Node, it could affect the density evaluation at runtime. * Randomly sample a few points on the ray to estimate the extrema. */ template -ccl_device_noinline Extrema volume_estimate_extrema(KernelGlobals kg, - const ccl_private Ray *ccl_restrict ray, - ccl_private ShaderData *ccl_restrict sd, - const IntegratorGenericState state, - const ccl_private RNGState *rng_state, - const uint32_t path_flag, - const Interval t, - const VolumeStack entry) +/* Work around apparent HIP compiler bug. */ +# ifdef __KERNEL_HIP__ +ccl_device +# else +ccl_device_noinline +# endif + Extrema + volume_estimate_extrema(KernelGlobals kg, + const ccl_private Ray *ccl_restrict ray, + ccl_private ShaderData *ccl_restrict sd, + const IntegratorGenericState state, + const ccl_private RNGState *rng_state, + const uint32_t path_flag, + const Interval t, + const VolumeStack entry) { const bool homogeneous = volume_is_homogeneous(kg, entry); const int samples = homogeneous ? 1 : 4; diff --git a/tests/python/eevee_render_tests.py b/tests/python/eevee_render_tests.py index 9d36bd809e3..b0bc97a3688 100644 --- a/tests/python/eevee_render_tests.py +++ b/tests/python/eevee_render_tests.py @@ -170,26 +170,6 @@ if inside_blender: sys.exit(1) -def get_gpu_device_type(blender): - # TODO: This always fails. - command = [ - blender, - "--background", - "--factory-startup", - "--python", - str(pathlib.Path(__file__).parent / "gpu_info.py") - ] - try: - completed_process = subprocess.run(command, stdout=subprocess.PIPE) - for line in completed_process.stdout.read_text(): - if line.startswith("GPU_DEVICE_TYPE:"): - vendor = line.split(':')[1] - return vendor - except Exception: - return None - return None - - def get_arguments(filepath, output_filepath, gpu_backend): arguments = [ "--background", @@ -230,11 +210,6 @@ def main(): parser = create_argparse() args = parser.parse_args() - gpu_device_type = get_gpu_device_type(args.blender) - reference_override_dir = None - if gpu_device_type == "AMD": - reference_override_dir = "eevee_renders/amd" - blocklist = BLOCKLIST if args.gpu_backend == "metal": blocklist += BLOCKLIST_METAL @@ -249,7 +224,6 @@ def main(): report.set_pixelated(True) report.set_reference_dir("eevee_renders") - report.set_reference_override_dir(reference_override_dir) test_dir_name = Path(args.testdir).name if test_dir_name.startswith('image_mapping'): diff --git a/tests/python/gpu_info.py b/tests/python/modules/gpu_info.py similarity index 100% rename from tests/python/gpu_info.py rename to tests/python/modules/gpu_info.py diff --git a/tests/python/modules/render_report.py b/tests/python/modules/render_report.py index 4d888c9371a..2a163bd49a9 100644 --- a/tests/python/modules/render_report.py +++ b/tests/python/modules/render_report.py @@ -174,6 +174,25 @@ def diff_output(test, oiiotool, fail_threshold, fail_percent, verbose, update): return test +def get_gpu_device_vendor(blender): + command = [ + blender, + "--background", + "--factory-startup", + "--python", + str(pathlib.Path(__file__).parent / "gpu_info.py") + ] + try: + completed_process = subprocess.run(command, stdout=subprocess.PIPE, universal_newlines=True) + for line in completed_process.stdout.splitlines(): + if line.startswith("GPU_DEVICE_TYPE:"): + vendor = line.split(':')[1].upper() + return vendor + except Exception: + return None + return None + + class Report: __slots__ = ( 'title', diff --git a/tests/python/storm_render_tests.py b/tests/python/storm_render_tests.py index d27c40cb94f..87facf20897 100644 --- a/tests/python/storm_render_tests.py +++ b/tests/python/storm_render_tests.py @@ -59,6 +59,25 @@ BLOCKLIST_METAL = [ "autosmooth_custom_normals.blend", ] +# AMD seems to have similar limitations as Metal for transparency. +BLOCKLIST_AMD = BLOCKLIST_METAL + [ + "musgrave_.*_multifractal.*.blend", + "noise_lacunarity.blend", +] + +# Minor difference in texture coordinate for white noise hash. +BLOCKLIST_INTEL = [ + "autosmooth_custom_normals.blend", + "hair_reflection.blend", + "hair_transmission.blend", + "principled_bsdf_emission.blend", + "principled_bsdf_sheen.blend", + "musgrave_.*_multifractal.*.blend", + "noise_lacunarity.blend", + "sss_hair.blend", + "white_noise.*.blend", +] + def setup(): import bpy @@ -120,7 +139,16 @@ def main(): from modules import render_report - blocklist = BLOCKLIST_METAL if sys.platform == "darwin" else [] + if sys.platform == "darwin": + blocklist = BLOCKLIST_METAL + else: + gpu_vendor = render_report.get_gpu_device_vendor(args.blender) + if gpu_vendor == "AMD": + blocklist = BLOCKLIST_AMD + elif gpu_vendor == "INTEL": + blocklist = BLOCKLIST_INTEL + else: + blocklist = [] if args.export_method == 'HYDRA': report = render_report.Report("Storm Hydra", args.outdir, args.oiiotool, blocklist=blocklist + BLOCKLIST_HYDRA)