diff --git a/intern/cycles/cmake/zstd_compress.cpp b/intern/cycles/cmake/zstd_compress.cpp new file mode 100644 index 00000000000..b2e64568ea9 --- /dev/null +++ b/intern/cycles/cmake/zstd_compress.cpp @@ -0,0 +1,54 @@ +/* SPDX-FileCopyrightText: 2024 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#include +#include +#include + +#include + +int main(int argc, const char **argv) +{ + if (argc < 3) { + return -1; + } + + /* TODO: This might fail for non-ASCII paths on Windows... */ + std::ifstream in(argv[1], std::ios_base::binary); + std::ofstream out(argv[2], std::ios_base::binary); + if (!in || !out) { + return -1; + } + + in.seekg(0, std::ios_base::end); + size_t in_size = in.tellg(); + in.seekg(0, std::ios_base::beg); + if (!in) { + return -1; + } + + std::vector in_data(in_size); + in.read(in_data.data(), in_size); + if (!in) { + return -1; + } + + size_t out_size = ZSTD_compressBound(in_size); + if (ZSTD_isError(out_size)) { + return -1; + } + std::vector out_data(out_size); + + out_size = ZSTD_compress(out_data.data(), out_data.size(), in_data.data(), in_data.size(), 19); + if (ZSTD_isError(out_size)) { + return -1; + } + + out.write(out_data.data(), out_size); + if (!out) { + return -1; + } + + return 0; +} diff --git a/intern/cycles/device/cuda/device_impl.cpp b/intern/cycles/device/cuda/device_impl.cpp index 5e1d8142202..cc412539636 100644 --- a/intern/cycles/device/cuda/device_impl.cpp +++ b/intern/cycles/device/cuda/device_impl.cpp @@ -256,7 +256,7 @@ string CUDADevice::compile_kernel(const string &common_cflags, /* Attempt to use kernel provided with Blender. */ if (!use_adaptive_compilation()) { if (!force_ptx) { - const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin", name, major, minor)); + const string cubin = path_get(string_printf("lib/%s_sm_%d%d.cubin.zst", name, major, minor)); VLOG_INFO << "Testing for pre-compiled kernel " << cubin << "."; if (path_exists(cubin)) { VLOG_INFO << "Using precompiled kernel."; @@ -268,7 +268,7 @@ string CUDADevice::compile_kernel(const string &common_cflags, int ptx_major = major, ptx_minor = minor; while (ptx_major >= 3) { const string ptx = path_get( - string_printf("lib/%s_compute_%d%d.ptx", name, ptx_major, ptx_minor)); + string_printf("lib/%s_compute_%d%d.ptx.zst", name, ptx_major, ptx_minor)); VLOG_INFO << "Testing for pre-compiled kernel " << ptx << "."; if (path_exists(ptx)) { VLOG_INFO << "Using precompiled kernel."; @@ -440,7 +440,7 @@ bool CUDADevice::load_kernels(const uint kernel_features) string cubin_data; CUresult result; - if (path_read_text(cubin, cubin_data)) { + if (path_read_compressed_text(cubin, cubin_data)) { result = cuModuleLoadData(&cuModule, cubin_data.c_str()); } else { diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 3679275fdd8..47e0a4a0d54 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -231,7 +231,7 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c /* Attempt to use kernel provided with Blender. */ if (!use_adaptive_compilation()) { - const string fatbin = path_get(string_printf("lib/%s_%s.fatbin", name, arch.c_str())); + const string fatbin = path_get(string_printf("lib/%s_%s.fatbin.zst", name, arch.c_str())); VLOG_INFO << "Testing for pre-compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { VLOG_INFO << "Using precompiled kernel."; @@ -387,7 +387,7 @@ bool HIPDevice::load_kernels(const uint kernel_features) string fatbin_data; hipError_t result; - if (path_read_text(fatbin, fatbin_data)) + if (path_read_compressed_text(fatbin, fatbin_data)) result = hipModuleLoadData(&hipModule, fatbin_data.c_str()); else result = hipErrorFileNotFound; diff --git a/intern/cycles/device/hiprt/device_impl.cpp b/intern/cycles/device/hiprt/device_impl.cpp index bfca220e1f0..c326ef3259c 100644 --- a/intern/cycles/device/hiprt/device_impl.cpp +++ b/intern/cycles/device/hiprt/device_impl.cpp @@ -141,7 +141,7 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, const std::string arch = hipDeviceArch(hipDevId); if (!use_adaptive_compilation()) { - const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb", name)); + const string fatbin = path_get(string_printf("lib/%s_rt_gfx.hipfb.zst", name)); VLOG(1) << "Testing for pre-compiled kernel " << fatbin << "."; if (path_exists(fatbin)) { VLOG(1) << "Using precompiled kernel."; @@ -309,8 +309,7 @@ bool HIPRTDevice::load_kernels(const uint kernel_features) string fatbin_data; hipError_t result; - if (path_read_text(fatbin, fatbin_data)) { - + if (path_read_compressed_text(fatbin, fatbin_data)) { result = hipModuleLoadData(&hipModule, fatbin_data.c_str()); } else diff --git a/intern/cycles/device/optix/device_impl.cpp b/intern/cycles/device/optix/device_impl.cpp index a22daf168d2..4c83b6c04bb 100644 --- a/intern/cycles/device/optix/device_impl.cpp +++ b/intern/cycles/device/optix/device_impl.cpp @@ -216,7 +216,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) ""; string ptx_filename; if (need_optix_kernels) { - ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx"); + ptx_filename = path_get("lib/kernel_optix" + suffix + ".ptx.zst"); if (use_adaptive_compilation() || path_file_size(ptx_filename) == -1) { std::string optix_include_dir = get_optix_include_dir(); if (optix_include_dir.empty()) { @@ -348,7 +348,7 @@ bool OptiXDevice::load_kernels(const uint kernel_features) string cflags = compile_kernel_get_common_cflags(kernel_features); ptx_filename = compile_kernel(cflags, ("kernel" + suffix).c_str(), "optix", true); } - if (ptx_filename.empty() || !path_read_text(ptx_filename, ptx_data)) { + if (ptx_filename.empty() || !path_read_compressed_text(ptx_filename, ptx_data)) { set_error(string_printf("Failed to load OptiX kernel from '%s'", ptx_filename.c_str())); return false; } @@ -798,8 +798,8 @@ bool OptiXDevice::load_osl_kernels() osl_modules.resize(osl_kernels.size() + 1); { /* Load and compile PTX module with OSL services. */ - string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx"); - if (!path_read_text(ptx_filename, ptx_data)) { + string ptx_data, ptx_filename = path_get("lib/kernel_optix_osl_services.ptx.zst"); + if (!path_read_compressed_text(ptx_filename, ptx_data)) { set_error(string_printf("Failed to load OptiX OSL services kernel from '%s'", ptx_filename.c_str())); return false; diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 28b1ceda344..56fb1fafd98 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -416,6 +416,11 @@ set(LIB ) +# Zstd compressor for kernels +add_executable(zstd_compress ../cmake/zstd_compress.cpp) +target_include_directories(zstd_compress SYSTEM PRIVATE ${ZSTD_INCLUDE_DIRS}) +target_link_libraries(zstd_compress ${ZSTD_LIBRARIES} ${PTHREADS_LIBRARIES}) + # CUDA module if(WITH_CYCLES_CUDA_BINARIES) @@ -455,6 +460,7 @@ if(WITH_CYCLES_CUDA_BINARIES) set(format "cubin") endif() set(cuda_file ${name}_${arch}.${format}) + set(cuda_file_compressed ${cuda_file}.zst) set(kernel_sources ${sources}) if(NOT ${prev_arch} STREQUAL "none") @@ -517,9 +523,14 @@ if(WITH_CYCLES_CUDA_BINARIES) DEPENDS ${kernel_sources}) endif() + add_custom_command( + OUTPUT ${cuda_file_compressed} + COMMAND "$" ${cuda_file} ${cuda_file_compressed} + DEPENDS ${cuda_file}) + unset(_cuda_nvcc_args) - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND cuda_cubins ${cuda_file}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${cuda_file_compressed}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND cuda_cubins ${cuda_file_compressed}) unset(cuda_debug_flags) endmacro() @@ -603,6 +614,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) macro(CYCLES_HIP_KERNEL_ADD arch name flags sources experimental) set(format "fatbin") set(hip_file ${name}_${arch}.${format}) + set(hip_file_compressed ${hip_file}.zst) set(kernel_sources ${sources}) set(hip_kernel_src "/device/hip/${name}.cpp") @@ -657,8 +669,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) OUTPUT ${hip_file} COMMAND ${hip_command} ${hip_flags} DEPENDS ${kernel_sources}) - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file}" ${CYCLES_INSTALL_PATH}/lib) - list(APPEND hip_fatbins ${hip_file}) + add_custom_command( + OUTPUT ${hip_file_compressed} + COMMAND "$" ${hip_file} ${hip_file_compressed} + DEPENDS ${hip_file}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hip_file_compressed}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND hip_fatbins ${hip_file_compressed}) endmacro() foreach(arch ${CYCLES_HIP_BINARIES_ARCH}) @@ -680,6 +696,7 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES) ${SRC_UTIL_HEADERS}) set(bitcode_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.bc) set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_gfx.hipfb) + set(hiprt_file_compressed ${hiprt_file}.zst) set(kernel_sources ${hiprt_sources}) set(hiprt_kernel_src "/device/hiprt/kernel.cpp") if(WIN32) @@ -744,8 +761,12 @@ if(WITH_CYCLES_DEVICE_HIPRT AND WITH_CYCLES_HIP_BINARIES) OUTPUT ${hiprt_file} COMMAND ${hiprt_link_command} ${hiprt_link_flags} DEPENDS ${bitcode_file}) - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file}" ${CYCLES_INSTALL_PATH}/lib) - add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file}) + add_custom_command( + OUTPUT ${hiprt_file_compressed} + COMMAND "$" ${hiprt_file} ${hiprt_file_compressed} + DEPENDS ${hiprt_file}) + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${hiprt_file_compressed}" ${CYCLES_INSTALL_PATH}/lib) + add_custom_target(cycles_kernel_hiprt ALL DEPENDS ${hiprt_file_compressed}) cycles_set_solution_folder(cycles_kernel_hiprt) endif() @@ -754,6 +775,7 @@ endif() if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) macro(cycles_optix_kernel_add name input flags) set(output "${CMAKE_CURRENT_BINARY_DIR}/${name}.ptx") + set(output_compressed "${output}.zst") set(cuda_flags ${flags} -I "${OPTIX_INCLUDE_DIR}" @@ -795,9 +817,14 @@ if(WITH_CYCLES_DEVICE_OPTIX AND WITH_CYCLES_CUDA_BINARIES) WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}") - list(APPEND optix_ptx ${output}) + add_custom_command( + OUTPUT ${output_compressed} + COMMAND "$" ${output} ${output_compressed} + DEPENDS ${output}) - delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output}" ${CYCLES_INSTALL_PATH}/lib) + list(APPEND optix_ptx ${output_compressed}) + + delayed_install("${CMAKE_CURRENT_BINARY_DIR}" "${output_compressed}" ${CYCLES_INSTALL_PATH}/lib) endmacro() cycles_optix_kernel_add( diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 2f854e3d69a..6eaa619da25 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -7,6 +7,7 @@ set(INC ) set(INC_SYS + ${ZSTD_INCLUDE_DIRS} ) set(SRC @@ -32,6 +33,7 @@ set(SRC set(LIB ${TBB_LIBRARIES} + ${ZSTD_LIBRARIES} ) set(SRC_HEADERS diff --git a/intern/cycles/util/path.cpp b/intern/cycles/util/path.cpp index 373b33f838d..2ae37ed01f4 100644 --- a/intern/cycles/util/path.cpp +++ b/intern/cycles/util/path.cpp @@ -19,6 +19,8 @@ OIIO_NAMESPACE_USING #include +#include + #if defined(_WIN32) # define DIR_SEP '\\' # define DIR_SEP_ALT '/' @@ -704,6 +706,36 @@ bool path_read_binary(const string &path, vector &binary) return true; } +bool path_read_compressed_binary(const string &path, vector &binary) +{ + if (!string_endswith(path, ".zst")) { + return path_read_binary(path, binary); + } + + vector compressed; + if (!path_read_binary(path, compressed)) { + return false; + } + + const size_t full_size = ZSTD_getFrameContentSize(compressed.data(), compressed.size()); + + if (full_size == ZSTD_CONTENTSIZE_ERROR) { + /* Potentially corrupted file? */ + return false; + } + if (full_size == ZSTD_CONTENTSIZE_UNKNOWN) { + /* Technically this is an optional field, but we can expect it to be set for now. + * Otherwise we'd need streaming decompression and repeated resizing of the vector. */ + return false; + } + + binary.resize(full_size); + + size_t err = ZSTD_decompress(binary.data(), binary.size(), compressed.data(), compressed.size()); + + return ZSTD_isError(err) == 0; +} + bool path_read_text(const string &path, string &text) { vector binary; @@ -719,6 +751,21 @@ bool path_read_text(const string &path, string &text) return true; } +bool path_read_compressed_text(const string &path, string &text) +{ + vector binary; + + if (!path_exists(path) || !path_read_compressed_binary(path, binary)) { + return false; + } + + const char *str = (const char *)&binary[0]; + size_t size = binary.size(); + text = string(str, size); + + return true; +} + uint64_t path_modified_time(const string &path) { path_stat_t st; diff --git a/intern/cycles/util/path.h b/intern/cycles/util/path.h index e34c852a4ef..15c00d01faa 100644 --- a/intern/cycles/util/path.h +++ b/intern/cycles/util/path.h @@ -50,6 +50,9 @@ bool path_write_text(const string &path, string &text); bool path_read_binary(const string &path, vector &binary); bool path_read_text(const string &path, string &text); +bool path_read_compressed_binary(const string &path, vector &binary); +bool path_read_compressed_text(const string &path, string &text); + /* File manipulation. */ bool path_remove(const string &path);