From 326d5bca032148e3d8107c1cf714adabb470fdeb Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 3 Apr 2025 16:24:04 +0200 Subject: [PATCH] Cycles: Support Decomposed MetalRT motion interpolation Currently MetalRT interpolates transformation matrix on per-element basis which leads to issues like #135659. This change adds implementation of for decomposed (Scale/Rotate/Translate) motion interpolation, matching behavior of BVH2 and other HW-RT. This requires macOS 15 and Xcode 16 in order to use this interpolation. On older platforms and compilers old interpolation is used. Currently there is no changes on the user (by default) and it is only available via CYCLES_METALRT_PCMI environment variable. This is because there are some issues with complex motion paths that need to be looked into. Having code available makes it easier to do further debugging. Ref #135659 Authored by Emma Liu Pull Request: https://projects.blender.org/blender/blender/pulls/136253 --- intern/cycles/device/metal/bvh.h | 3 + intern/cycles/device/metal/bvh.mm | 200 ++++++++++++++++------ intern/cycles/device/metal/device_impl.h | 1 + intern/cycles/device/metal/device_impl.mm | 11 ++ intern/cycles/util/debug.cpp | 10 +- intern/cycles/util/debug.h | 5 + 6 files changed, 178 insertions(+), 52 deletions(-) diff --git a/intern/cycles/device/metal/bvh.h b/intern/cycles/device/metal/bvh.h index ac206ba6d72..2aedaf36ccd 100644 --- a/intern/cycles/device/metal/bvh.h +++ b/intern/cycles/device/metal/bvh.h @@ -32,6 +32,9 @@ class BVHMetal : public BVH { bool motion_blur = false; + /* Per-component Motion Interpolation in macOS 15. */ + bool use_pcmi = false; + bool build(Progress &progress, id device, id queue, bool refit); BVHMetal(const BVHParams ¶ms, diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm index 178d6d49002..a009a649117 100644 --- a/intern/cycles/device/metal/bvh.mm +++ b/intern/cycles/device/metal/bvh.mm @@ -178,11 +178,6 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, return false; } - /*------------------------------------------------*/ - BVH_status( - "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str()); - /*------------------------------------------------*/ - const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas(); const array &verts = mesh->get_verts(); @@ -249,6 +244,11 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, geomDescMotion.opaque = true; geomDesc = geomDescMotion; + + BVH_status("Building motion mesh BLAS | %7d tris | %s | %7d motion keyframes", + (int)mesh->num_triangles(), + geom->name.c_str(), + (int)num_motion_steps); } else { MTLAccelerationStructureTriangleGeometryDescriptor *geomDescNoMotion = @@ -264,6 +264,9 @@ bool BVHMetal::build_BLAS_mesh(Progress &progress, geomDescNoMotion.opaque = true; geomDesc = geomDescNoMotion; + + BVH_status( + "Building mesh BLAS | %7d tris | %s", (int)mesh->num_triangles(), geom->name.c_str()); } /* Force a single any-hit call, so shadow record-all behavior works correctly */ @@ -388,11 +391,6 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, return false; } - /*------------------------------------------------*/ - BVH_status( - "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str()); - /*------------------------------------------------*/ - const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas(); size_t num_motion_steps = 1; @@ -406,7 +404,7 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, id idxBuffer = nil; MTLAccelerationStructureGeometryDescriptor *geomDesc; - if (motion_blur) { + if (num_motion_steps > 1) { MTLAccelerationStructureMotionCurveGeometryDescriptor *geomDescCrv = [MTLAccelerationStructureMotionCurveGeometryDescriptor descriptor]; @@ -587,12 +585,21 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, [MTLPrimitiveAccelerationStructureDescriptor descriptor]; accelDesc.geometryDescriptors = @[ geomDesc ]; - if (motion_blur) { + if (num_motion_steps > 1) { accelDesc.motionStartTime = 0.0f; accelDesc.motionEndTime = 1.0f; accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish; accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish; accelDesc.motionKeyframeCount = num_motion_steps; + + BVH_status("Building motion hair BLAS | %7d curves | %s | %7d motion keyframes", + (int)hair->num_curves(), + geom->name.c_str(), + (int)num_motion_steps); + } + else { + BVH_status( + "Building hair BLAS | %7d curves | %s", (int)hair->num_curves(), geom->name.c_str()); } if (!use_fast_trace_bvh) { @@ -708,12 +715,6 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, return false; } - /*------------------------------------------------*/ - BVH_status("Building pointcloud BLAS | %7d points | %s", - (int)pointcloud->num_points(), - geom->name.c_str()); - /*------------------------------------------------*/ - const size_t num_points = pointcloud->get_points().size(); const float3 *points = pointcloud->get_points().data(); const float *radius = pointcloud->get_radius().data(); @@ -766,7 +767,7 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, } MTLAccelerationStructureGeometryDescriptor *geomDesc; - if (motion_blur) { + if (num_motion_steps > 1) { std::vector aabb_ptrs; aabb_ptrs.reserve(num_motion_steps); for (size_t step = 0; step < num_motion_steps; ++step) { @@ -812,12 +813,22 @@ bool BVHMetal::build_BLAS_pointcloud(Progress &progress, [MTLPrimitiveAccelerationStructureDescriptor descriptor]; accelDesc.geometryDescriptors = @[ geomDesc ]; - if (motion_blur) { + if (num_motion_steps > 1) { accelDesc.motionStartTime = 0.0f; accelDesc.motionEndTime = 1.0f; // accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish; // accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish; accelDesc.motionKeyframeCount = num_motion_steps; + + BVH_status("Building motion pointcloud BLAS | %7d points | %s | %7d motion keyframes", + (int)pointcloud->num_points(), + geom->name.c_str(), + (int)num_motion_steps); + } + else { + BVH_status("Building pointcloud BLAS | %7d points | %s", + (int)pointcloud->num_points(), + geom->name.c_str()); } accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits; @@ -933,6 +944,34 @@ bool BVHMetal::build_BLAS(Progress &progress, return false; } +# if defined(MAC_OS_VERSION_15_0) + +/* Return MTLComponentTransform from a DecomposedTransform. */ +static MTLComponentTransform decomposed_to_component_transform(const DecomposedTransform &src) +{ + MTLComponentTransform tfm; + tfm.scale = MTLPackedFloat3Make(src.y.w, src.z.w, src.w.w); + tfm.shear = MTLPackedFloat3Make(src.z.x, src.z.y, src.w.x); + tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f); + tfm.rotation = MTLPackedFloatQuaternionMake(src.x.x, src.x.y, src.x.z, src.x.w); + tfm.translation = MTLPackedFloat3Make(src.y.x, src.y.y, src.y.z); + return tfm; +} + +/* Return unit MTLComponentTransform. */ +static MTLComponentTransform component_transform_make_unit() +{ + MTLComponentTransform tfm; + tfm.scale = MTLPackedFloat3Make(1.0f, 1.0f, 1.0f); + tfm.shear = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f); + tfm.pivot = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f); + tfm.rotation = MTLPackedFloatQuaternionMake(0.0f, 0.0f, 0.0f, 1.0f); + tfm.translation = MTLPackedFloat3Make(0.0f, 0.0f, 0.0f); + return tfm; +} + +# endif + bool BVHMetal::build_TLAS(Progress &progress, id mtl_device, id queue, @@ -996,11 +1035,13 @@ bool BVHMetal::build_TLAS(Progress &progress, uint32_t num_instances = 0; uint32_t num_motion_transforms = 0; + uint32_t num_motion_instances = 0; for (Object *ob : objects) { num_instances++; if (ob->use_motion()) { num_motion_transforms += max((size_t)1, ob->get_motion().size()); + num_motion_instances++; } else { num_motion_transforms++; @@ -1011,10 +1052,7 @@ bool BVHMetal::build_TLAS(Progress &progress, return false; } - /*------------------------------------------------*/ - BVH_status("Building TLAS | %7d instances", (int)num_instances); - /*------------------------------------------------*/ - + const bool use_instance_motion = motion_blur && num_motion_instances; const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC) || !support_refit_blas(); NSMutableArray *all_blas = [NSMutableArray array]; @@ -1035,7 +1073,7 @@ bool BVHMetal::build_TLAS(Progress &progress, }; size_t instance_size; - if (motion_blur) { + if (use_instance_motion) { instance_size = sizeof(MTLAccelerationStructureMotionInstanceDescriptor); } else { @@ -1046,12 +1084,28 @@ bool BVHMetal::build_TLAS(Progress &progress, id instanceBuf = [mtl_device newBufferWithLength:num_instances * instance_size options:MTLResourceStorageModeShared]; id motion_transforms_buf = nil; - MTLPackedFloat4x3 *motion_transforms = nullptr; - if (motion_blur && num_motion_transforms) { - motion_transforms_buf = [mtl_device - newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3) - options:MTLResourceStorageModeShared]; - motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents; + MTLPackedFloat4x3 *matrix_motion_transforms = nullptr; +# if defined(MAC_OS_VERSION_15_0) + MTLComponentTransform *decomposed_motion_transforms = nullptr; +# endif + if (use_instance_motion && num_motion_transforms) { +# if defined(MAC_OS_VERSION_15_0) + if (use_pcmi) { + if (@available(macos 15.0, *)) { + motion_transforms_buf = [mtl_device + newBufferWithLength:num_motion_transforms * sizeof(MTLComponentTransform) + options:MTLResourceStorageModeShared]; + decomposed_motion_transforms = (MTLComponentTransform *)motion_transforms_buf.contents; + } + } + else +# endif + { + motion_transforms_buf = [mtl_device + newBufferWithLength:num_motion_transforms * sizeof(MTLPackedFloat4x3) + options:MTLResourceStorageModeShared]; + matrix_motion_transforms = (MTLPackedFloat4x3 *)motion_transforms_buf.contents; + } } uint32_t instance_index = 0; @@ -1115,7 +1169,7 @@ bool BVHMetal::build_TLAS(Progress &progress, } /* Bake into the appropriate descriptor */ - if (motion_blur) { + if (use_instance_motion) { MTLAccelerationStructureMotionInstanceDescriptor *instances = (MTLAccelerationStructureMotionInstanceDescriptor *)[instanceBuf contents]; MTLAccelerationStructureMotionInstanceDescriptor &desc = instances[currIndex]; @@ -1130,34 +1184,64 @@ bool BVHMetal::build_TLAS(Progress &progress, desc.motionEndBorderMode = MTLMotionBorderModeVanish; desc.intersectionFunctionTableOffset = 0; + array decomp(ob->get_motion().size()); + transform_motion_decompose( + decomp.data(), ob->get_motion().data(), ob->get_motion().size()); + int key_count = ob->get_motion().size(); if (key_count) { desc.motionTransformsCount = key_count; - Transform *keys = ob->get_motion().data(); - for (int i = 0; i < key_count; i++) { - float *t = (float *)&motion_transforms[motion_transform_index++]; - /* Transpose transform */ - const auto *src = (const float *)&keys[i]; - for (int i = 0; i < 12; i++) { - t[i] = src[(i / 3) + 4 * (i % 3)]; +# if defined(MAC_OS_VERSION_15_0) + if (use_pcmi) { + for (int i = 0; i < key_count; i++) { + decomposed_motion_transforms[motion_transform_index++] = + decomposed_to_component_transform(decomp[i]); + } + } + else +# endif + { + Transform *keys = ob->get_motion().data(); + for (int i = 0; i < key_count; i++) { + float *t = (float *)&matrix_motion_transforms[motion_transform_index++]; + /* Transpose transform */ + const auto *src = (const float *)&keys[i]; + for (int i = 0; i < 12; i++) { + t[i] = src[(i / 3) + 4 * (i % 3)]; + } } } } else { desc.motionTransformsCount = 1; - float *t = (float *)&motion_transforms[motion_transform_index++]; - if (ob->get_geometry()->is_instanced()) { - /* Transpose transform */ - const auto *src = (const float *)&ob->get_tfm(); - for (int i = 0; i < 12; i++) { - t[i] = src[(i / 3) + 4 * (i % 3)]; +# if defined(MAC_OS_VERSION_15_0) + if (use_pcmi) { + if (ob->get_geometry()->is_instanced()) { + decomposed_motion_transforms[motion_transform_index++] = + decomposed_to_component_transform(decomp[0]); + } + else { + decomposed_motion_transforms[motion_transform_index++] = + component_transform_make_unit(); } } - else { - /* Clear transform to identity matrix */ - t[0] = t[4] = t[8] = 1.0f; + else +# endif + { + float *t = (float *)&matrix_motion_transforms[motion_transform_index++]; + if (ob->get_geometry()->is_instanced()) { + /* Transpose transform */ + const auto *src = (const float *)&ob->get_tfm(); + for (int i = 0; i < 12; i++) { + t[i] = src[(i / 3) + 4 * (i % 3)]; + } + } + else { + /* Clear transform to identity matrix */ + t[0] = t[4] = t[8] = 1.0f; + } } } } @@ -1187,6 +1271,18 @@ bool BVHMetal::build_TLAS(Progress &progress, } } + if (use_instance_motion) { + BVH_status( + "Building motion TLAS | %7d instances | %7d motion instances | %7d motion " + "transforms", + (int)num_instances, + (int)num_motion_instances, + (int)num_motion_transforms); + } + else { + BVH_status("Building TLAS | %7d instances", (int)num_instances); + } + MTLInstanceAccelerationStructureDescriptor *accelDesc = [MTLInstanceAccelerationStructureDescriptor descriptor]; accelDesc.instanceCount = num_instances; @@ -1196,10 +1292,16 @@ bool BVHMetal::build_TLAS(Progress &progress, accelDesc.instanceDescriptorStride = instance_size; accelDesc.instancedAccelerationStructures = all_blas; - if (motion_blur) { + if (use_instance_motion) { accelDesc.instanceDescriptorType = MTLAccelerationStructureInstanceDescriptorTypeMotion; accelDesc.motionTransformBuffer = motion_transforms_buf; accelDesc.motionTransformCount = num_motion_transforms; +# if defined(MAC_OS_VERSION_15_0) + if (@available(macos 15.0, *)) { + accelDesc.motionTransformStride = 0; + accelDesc.motionTransformType = use_pcmi ? MTLTransformTypeComponent : MTLTransformTypePackedFloat4x3; + } +# endif } accelDesc.usage |= MTLAccelerationStructureUsageExtendedLimits; diff --git a/intern/cycles/device/metal/device_impl.h b/intern/cycles/device/metal/device_impl.h index f6e4b0ffc4d..a5f02a05dc7 100644 --- a/intern/cycles/device/metal/device_impl.h +++ b/intern/cycles/device/metal/device_impl.h @@ -42,6 +42,7 @@ class MetalDevice : public Device { /* MetalRT members ----------------------------------*/ bool use_metalrt = false; bool motion_blur = false; + bool use_pcmi = false; id mtlASArgEncoder = nil; /* encoder used for fetching device pointers from MTLAccelerationStructure */ diff --git a/intern/cycles/device/metal/device_impl.mm b/intern/cycles/device/metal/device_impl.mm index 91e0d721d31..b5b3e4d6261 100644 --- a/intern/cycles/device/metal/device_impl.mm +++ b/intern/cycles/device/metal/device_impl.mm @@ -99,6 +99,16 @@ MetalDevice::MetalDevice(const DeviceInfo &info, Stats &stats, Profiler &profile use_metalrt = (atoi(metalrt) != 0); } +# if defined(MAC_OS_VERSION_15_0) + /* Use "Ray tracing with per component motion interpolation" if available. + * Requires Apple9 support (https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf). */ + if (use_metalrt && [mtlDevice supportsFamily:MTLGPUFamilyApple9]) { + if (@available(macos 15.0, *)) { + use_pcmi = DebugFlags().metal.use_metalrt_pcmi; + } + } +# endif + if (getenv("CYCLES_DEBUG_METAL_CAPTURE_KERNEL")) { capture_enabled = true; } @@ -1383,6 +1393,7 @@ void MetalDevice::build_bvh(BVH *bvh, Progress &progress, bool refit) BVHMetal *bvh_metal = static_cast(bvh); bvh_metal->motion_blur = motion_blur; + bvh_metal->use_pcmi = use_pcmi; if (bvh_metal->build(progress, mtlDevice, mtlGeneralCommandQueue, refit)) { if (bvh->params.top_level) { diff --git a/intern/cycles/util/debug.cpp b/intern/cycles/util/debug.cpp index a15eecd5bfa..b853214b348 100644 --- a/intern/cycles/util/debug.cpp +++ b/intern/cycles/util/debug.cpp @@ -69,17 +69,21 @@ void DebugFlags::Metal::reset() adaptive_compile = true; } - if (auto *str = getenv("CYCLES_METAL_LOCAL_ATOMIC_SORT")) { + if (const char *str = getenv("CYCLES_METAL_LOCAL_ATOMIC_SORT")) { use_local_atomic_sort = (atoi(str) != 0); } - if (auto *str = getenv("CYCLES_METAL_NANOVDB")) { + if (const char *str = getenv("CYCLES_METAL_NANOVDB")) { use_nanovdb = (atoi(str) != 0); } - if (auto *str = getenv("CYCLES_METAL_ASYNC_PSO_CREATION")) { + if (const char *str = getenv("CYCLES_METAL_ASYNC_PSO_CREATION")) { use_async_pso_creation = (atoi(str) != 0); } + + if (const char *str = getenv("CYCLES_METALRT_PCMI")) { + use_metalrt_pcmi = (atoi(str) != 0); + } } DebugFlags::OptiX::OptiX() diff --git a/intern/cycles/util/debug.h b/intern/cycles/util/debug.h index bf374ecb74b..6e9032f7945 100644 --- a/intern/cycles/util/debug.h +++ b/intern/cycles/util/debug.h @@ -100,6 +100,11 @@ class DebugFlags { /* Whether async PSO creation is enabled or not. */ bool use_async_pso_creation = true; + + /* Whether to use per-component motion interpolation. + * TODO: Enable by default when "multi step velocity motion blur" fail is fixed. + */ + bool use_metalrt_pcmi = false; }; /* Get instance of debug flags registry. */