diff --git a/intern/cycles/device/metal/bvh.h b/intern/cycles/device/metal/bvh.h index cbc5ca7d2c3..58d71e3928f 100644 --- a/intern/cycles/device/metal/bvh.h +++ b/intern/cycles/device/metal/bvh.h @@ -58,6 +58,11 @@ class BVHMetal : public BVH { id queue, Geometry *const geom, bool refit); + bool build_BLAS_pointcloud(Progress &progress, + id device, + id queue, + Geometry *const geom, + bool refit); bool build_TLAS(Progress &progress, id device, id queue, bool refit); }; diff --git a/intern/cycles/device/metal/bvh.mm b/intern/cycles/device/metal/bvh.mm index 1953102cb41..8b252f1a5ec 100644 --- a/intern/cycles/device/metal/bvh.mm +++ b/intern/cycles/device/metal/bvh.mm @@ -19,6 +19,7 @@ # include "scene/hair.h" # include "scene/mesh.h" # include "scene/object.h" +# include "scene/pointcloud.h" # include "util/progress.h" @@ -475,6 +476,220 @@ bool BVHMetal::build_BLAS_hair(Progress &progress, return false; } +bool BVHMetal::build_BLAS_pointcloud(Progress &progress, + id device, + id queue, + Geometry *const geom, + bool refit) +{ + if (@available(macos 12.0, *)) { + /* Build BLAS for point cloud */ + PointCloud *pointcloud = static_cast(geom); + if (pointcloud->num_points() == 0) { + return false; + } + + /*------------------------------------------------*/ + BVH_status("Building pointcloud BLAS | %7d points | %s", + (int)pointcloud->num_points(), + geom->name.c_str()); + /*------------------------------------------------*/ + + const size_t num_points = pointcloud->get_points().size(); + const float3 *points = pointcloud->get_points().data(); + const float *radius = pointcloud->get_radius().data(); + + const bool use_fast_trace_bvh = (params.bvh_type == BVH_TYPE_STATIC); + + size_t num_motion_steps = 1; + Attribute *motion_keys = pointcloud->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); + if (motion_blur && pointcloud->get_use_motion_blur() && motion_keys) { + num_motion_steps = pointcloud->get_motion_steps(); + } + + const size_t num_aabbs = num_motion_steps; + + MTLResourceOptions storage_mode; + if (device.hasUnifiedMemory) { + storage_mode = MTLResourceStorageModeShared; + } + else { + storage_mode = MTLResourceStorageModeManaged; + } + + /* Allocate a GPU buffer for the AABB data and populate it */ + id aabbBuf = [device + newBufferWithLength:num_aabbs * sizeof(MTLAxisAlignedBoundingBox) + options:storage_mode]; + MTLAxisAlignedBoundingBox *aabb_data = (MTLAxisAlignedBoundingBox *)[aabbBuf contents]; + + /* Get AABBs for each motion step */ + size_t center_step = (num_motion_steps - 1) / 2; + for (size_t step = 0; step < num_motion_steps; ++step) { + /* The center step for motion vertices is not stored in the attribute */ + if (step != center_step) { + size_t attr_offset = (step > center_step) ? step - 1 : step; + points = motion_keys->data_float3() + attr_offset * num_points; + } + + for (size_t j = 0; j < num_points; ++j) { + const PointCloud::Point point = pointcloud->get_point(j); + BoundBox bounds = BoundBox::empty; + point.bounds_grow(points, radius, bounds); + + const size_t index = step * num_points + j; + aabb_data[index].min = (MTLPackedFloat3 &)bounds.min; + aabb_data[index].max = (MTLPackedFloat3 &)bounds.max; + } + } + + if (storage_mode == MTLResourceStorageModeManaged) { + [aabbBuf didModifyRange:NSMakeRange(0, aabbBuf.length)]; + } + +# if 0 + for (size_t i=0; i aabb_ptrs; + aabb_ptrs.reserve(num_motion_steps); + for (size_t step = 0; step < num_motion_steps; ++step) { + MTLMotionKeyframeData *k = [MTLMotionKeyframeData data]; + k.buffer = aabbBuf; + k.offset = step * num_points * sizeof(MTLAxisAlignedBoundingBox); + aabb_ptrs.push_back(k); + } + + MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor *geomDescMotion = + [MTLAccelerationStructureMotionBoundingBoxGeometryDescriptor descriptor]; + geomDescMotion.boundingBoxBuffers = [NSArray arrayWithObjects:aabb_ptrs.data() + count:aabb_ptrs.size()]; + geomDescMotion.boundingBoxCount = num_points; + geomDescMotion.boundingBoxStride = sizeof(aabb_data[0]); + geomDescMotion.intersectionFunctionTableOffset = 2; + + /* Force a single any-hit call, so shadow record-all behavior works correctly */ + /* (Match optix behavior: unsigned int build_flags = + * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ + geomDescMotion.allowDuplicateIntersectionFunctionInvocation = false; + geomDescMotion.opaque = true; + geomDesc = geomDescMotion; + } + else { + MTLAccelerationStructureBoundingBoxGeometryDescriptor *geomDescNoMotion = + [MTLAccelerationStructureBoundingBoxGeometryDescriptor descriptor]; + geomDescNoMotion.boundingBoxBuffer = aabbBuf; + geomDescNoMotion.boundingBoxBufferOffset = 0; + geomDescNoMotion.boundingBoxCount = int(num_aabbs); + geomDescNoMotion.boundingBoxStride = sizeof(aabb_data[0]); + geomDescNoMotion.intersectionFunctionTableOffset = 2; + + /* Force a single any-hit call, so shadow record-all behavior works correctly */ + /* (Match optix behavior: unsigned int build_flags = + * OPTIX_GEOMETRY_FLAG_REQUIRE_SINGLE_ANYHIT_CALL;) */ + geomDescNoMotion.allowDuplicateIntersectionFunctionInvocation = false; + geomDescNoMotion.opaque = true; + geomDesc = geomDescNoMotion; + } + + MTLPrimitiveAccelerationStructureDescriptor *accelDesc = + [MTLPrimitiveAccelerationStructureDescriptor descriptor]; + accelDesc.geometryDescriptors = @[ geomDesc ]; + + if (motion_blur) { + accelDesc.motionStartTime = 0.0f; + accelDesc.motionEndTime = 1.0f; + accelDesc.motionStartBorderMode = MTLMotionBorderModeVanish; + accelDesc.motionEndBorderMode = MTLMotionBorderModeVanish; + accelDesc.motionKeyframeCount = num_motion_steps; + } + + if (!use_fast_trace_bvh) { + accelDesc.usage |= (MTLAccelerationStructureUsageRefit | + MTLAccelerationStructureUsagePreferFastBuild); + } + + MTLAccelerationStructureSizes accelSizes = [device + accelerationStructureSizesWithDescriptor:accelDesc]; + id accel_uncompressed = [device + newAccelerationStructureWithSize:accelSizes.accelerationStructureSize]; + id scratchBuf = [device newBufferWithLength:accelSizes.buildScratchBufferSize + options:MTLResourceStorageModePrivate]; + id sizeBuf = [device newBufferWithLength:8 options:MTLResourceStorageModeShared]; + id accelCommands = [queue commandBuffer]; + id accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + if (refit) { + [accelEnc refitAccelerationStructure:accel_struct + descriptor:accelDesc + destination:accel_uncompressed + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + else { + [accelEnc buildAccelerationStructure:accel_uncompressed + descriptor:accelDesc + scratchBuffer:scratchBuf + scratchBufferOffset:0]; + } + if (use_fast_trace_bvh) { + [accelEnc writeCompactedAccelerationStructureSize:accel_uncompressed + toBuffer:sizeBuf + offset:0 + sizeDataType:MTLDataTypeULong]; + } + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id command_buffer) { + /* free temp resources */ + [scratchBuf release]; + [aabbBuf release]; + + if (use_fast_trace_bvh) { + /* Compact the accel structure */ + uint64_t compressed_size = *(uint64_t *)sizeBuf.contents; + + dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ + id accelCommands = [queue commandBuffer]; + id accelEnc = + [accelCommands accelerationStructureCommandEncoder]; + id accel = [device + newAccelerationStructureWithSize:compressed_size]; + [accelEnc copyAndCompactAccelerationStructure:accel_uncompressed + toAccelerationStructure:accel]; + [accelEnc endEncoding]; + [accelCommands addCompletedHandler:^(id command_buffer) { + uint64_t allocated_size = [accel allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct = accel; + [accel_uncompressed release]; + accel_struct_building = false; + }]; + [accelCommands commit]; + }); + } + else { + /* set our acceleration structure to the uncompressed structure */ + accel_struct = accel_uncompressed; + + uint64_t allocated_size = [accel_struct allocatedSize]; + stats.mem_alloc(allocated_size); + accel_struct_building = false; + } + [sizeBuf release]; + }]; + + accel_struct_building = true; + [accelCommands commit]; + return true; + } + return false; +} + bool BVHMetal::build_BLAS(Progress &progress, id device, id queue, @@ -491,6 +706,8 @@ bool BVHMetal::build_BLAS(Progress &progress, return build_BLAS_mesh(progress, device, queue, geom, refit); case Geometry::HAIR: return build_BLAS_hair(progress, device, queue, geom, refit); + case Geometry::POINTCLOUD: + return build_BLAS_pointcloud(progress, device, queue, geom, refit); default: return false; } diff --git a/intern/cycles/device/metal/kernel.h b/intern/cycles/device/metal/kernel.h index 4874af1bfa6..a4bfb30436d 100644 --- a/intern/cycles/device/metal/kernel.h +++ b/intern/cycles/device/metal/kernel.h @@ -36,6 +36,8 @@ enum { METALRT_FUNC_CURVE_RIBBON_SHADOW, METALRT_FUNC_CURVE_ALL, METALRT_FUNC_CURVE_ALL_SHADOW, + METALRT_FUNC_POINT, + METALRT_FUNC_POINT_SHADOW, METALRT_FUNC_NUM }; diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index f948a8a0a0f..e9bd1cea5df 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -358,6 +358,8 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) "__intersection__curve_ribbon_shadow", "__intersection__curve_all", "__intersection__curve_all_shadow", + "__intersection__point", + "__intersection__point_shadow", }; assert(sizeof(function_names) / sizeof(function_names[0]) == METALRT_FUNC_NUM); @@ -400,36 +402,50 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) NSArray *function_list = nil; if (device->use_metalrt) { - id box_intersect_default = nil; - id box_intersect_shadow = nil; + id curve_intersect_default = nil; + id curve_intersect_shadow = nil; + id point_intersect_default = nil; + id point_intersect_shadow = nil; if (device->kernel_features & KERNEL_FEATURE_HAIR) { /* Add curve intersection programs. */ if (device->kernel_features & KERNEL_FEATURE_HAIR_THICK) { /* Slower programs for thick hair since that also slows down ribbons. * Ideally this should not be needed. */ - box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL]; - box_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW]; + curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL]; + curve_intersect_shadow = + rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_ALL_SHADOW]; } else { - box_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON]; - box_intersect_shadow = + curve_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON]; + curve_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_CURVE_RIBBON_SHADOW]; } } + if (device->kernel_features & KERNEL_FEATURE_POINTCLOUD) { + point_intersect_default = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT]; + point_intersect_shadow = rt_intersection_funcs[kernel_type][METALRT_FUNC_POINT_SHADOW]; + } table_functions[METALRT_TABLE_DEFAULT] = [NSArray arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_TRI], - box_intersect_default ? - box_intersect_default : + curve_intersect_default ? + curve_intersect_default : + rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], + point_intersect_default ? + point_intersect_default : rt_intersection_funcs[kernel_type][METALRT_FUNC_DEFAULT_BOX], nil]; table_functions[METALRT_TABLE_SHADOW] = [NSArray arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_TRI], - box_intersect_shadow ? - box_intersect_shadow : + curve_intersect_shadow ? + curve_intersect_shadow : + rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], + point_intersect_shadow ? + point_intersect_shadow : rt_intersection_funcs[kernel_type][METALRT_FUNC_SHADOW_BOX], nil]; table_functions[METALRT_TABLE_LOCAL] = [NSArray arrayWithObjects:rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_TRI], + rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], rt_intersection_funcs[kernel_type][METALRT_FUNC_LOCAL_BOX], nil]; diff --git a/intern/cycles/kernel/device/metal/kernel.metal b/intern/cycles/kernel/device/metal/kernel.metal index deb7dafe55e..3303b541487 100644 --- a/intern/cycles/kernel/device/metal/kernel.metal +++ b/intern/cycles/kernel/device/metal/kernel.metal @@ -576,6 +576,150 @@ __intersection__curve_all_shadow(constant KernelParamsMetal &launch_params_metal return result; } - #endif /* __HAIR__ */ + +#ifdef __POINTCLOUD__ +ccl_device_inline +void metalrt_intersection_point(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ +# ifdef __VISIBILITY_FLAG__ + const uint visibility = payload.visibility; + if ((kernel_tex_fetch(__objects, object).visibility & visibility) == 0) { + return; + } +# endif + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the point intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space. */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result = metalrt_visibility_test( + launch_params_metal, payload, object, prim, isect.u); + if (result.accept) { + result.distance = isect.t / len; + payload.u = isect.u; + payload.v = isect.v; + payload.prim = prim; + payload.type = type; + } + } +} + +ccl_device_inline +void metalrt_intersection_point_shadow(constant KernelParamsMetal &launch_params_metal, + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload, + const uint object, + const uint prim, + const uint type, + const float3 ray_origin, + const float3 ray_direction, + float time, + const float ray_tmax, + thread BoundingBoxIntersectionResult &result) +{ + const uint visibility = payload.visibility; + + float3 P = ray_origin; + float3 dir = ray_direction; + + /* The direction is not normalized by default, but the point intersection routine expects that */ + float len; + dir = normalize_len(dir, &len); + + Intersection isect; + isect.t = ray_tmax; + /* Transform maximum distance into object space */ + if (isect.t != FLT_MAX) + isect.t *= len; + + MetalKernelContext context(launch_params_metal); + if (context.point_intersect(NULL, &isect, P, dir, isect.t, object, prim, time, type)) { + result.continue_search = metalrt_shadow_all_hit( + launch_params_metal, payload, object, prim, float2(isect.u, isect.v), ray_tmax); + result.accept = !result.continue_search; + + if (result.accept) { + result.distance = isect.t / len; + } + } +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__point(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const int type = kernel_tex_fetch(__objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} + +[[intersection(bounding_box, triangle_data, METALRT_TAGS)]] +BoundingBoxIntersectionResult +__intersection__point_shadow(constant KernelParamsMetal &launch_params_metal [[buffer(1)]], + ray_data MetalKernelContext::MetalRTIntersectionShadowPayload &payload [[payload]], + const uint object [[user_instance_id]], + const uint primitive_id [[primitive_id]], + const float3 ray_origin [[origin]], + const float3 ray_direction [[direction]], + const float ray_tmax [[max_distance]]) +{ + const uint prim = primitive_id + kernel_tex_fetch(__object_prim_offset, object); + const int type = kernel_tex_fetch(__objects, object).primitive_type; + + BoundingBoxIntersectionResult result; + result.accept = false; + result.continue_search = true; + result.distance = ray_tmax; + + metalrt_intersection_point_shadow(launch_params_metal, payload, object, prim, type, ray_origin, ray_direction, +# if defined(__METALRT_MOTION__) + payload.time, +# else + 0.0f, +# endif + ray_tmax, result); + + return result; +} +#endif /* __POINTCLOUD__ */ #endif /* __METALRT__ */